diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll
index 4a66116bb2b59..e7350fec10546 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.vni16.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
 
-define void @add_v3i16(<3 x i16> addrspace(1)* %ptra, <3 x i16> addrspace(1)* %ptrb, <3 x i16> addrspace(1)* %ptr2) {
+define void @add_v3i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v3i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -58,10 +58,10 @@ define void @add_v3i16(<3 x i16> addrspace(1)* %ptra, <3 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_short v[4:5], v2, off offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <3 x i16>, <3 x i16> addrspace(1)* %ptra, align 4
-  %b = load <3 x i16>, <3 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <3 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <3 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <3 x i16> %a, %b
-  store <3 x i16> %add, <3 x i16> addrspace(1)* %ptr2, align 4
+  store <3 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -85,7 +85,7 @@ define <3 x i16> @add_v3i16_arg(<3 x i16> %a, <3 x i16> %b) {
   ret <3 x i16> %add
 }
 
-define void @add_v4i16(<4 x i16> addrspace(1)* %ptra, <4 x i16> addrspace(1)* %ptrb, <4 x i16> addrspace(1)* %ptr2) {
+define void @add_v4i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v4i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -113,10 +113,10 @@ define void @add_v4i16(<4 x i16> addrspace(1)* %ptra, <4 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <4 x i16>, <4 x i16> addrspace(1)* %ptra, align 4
-  %b = load <4 x i16>, <4 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <4 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <4 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <4 x i16> %a, %b
-  store <4 x i16> %add, <4 x i16> addrspace(1)* %ptr2, align 4
+  store <4 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -142,7 +142,7 @@ define <4 x i16> @add_v4i16_arg(<4 x i16> %a, <4 x i16> %b) {
   ret <4 x i16> %add
 }
 
-define void @add_v5i16(<5 x i16> addrspace(1)* %ptra, <5 x i16> addrspace(1)* %ptrb, <5 x i16> addrspace(1)* %ptr2) {
+define void @add_v5i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v5i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -231,10 +231,10 @@ define void @add_v5i16(<5 x i16> addrspace(1)* %ptra, <5 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_short v[4:5], v6, off offset:8
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <5 x i16>, <5 x i16> addrspace(1)* %ptra, align 4
-  %b = load <5 x i16>, <5 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <5 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <5 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <5 x i16> %a, %b
-  store <5 x i16> %add, <5 x i16> addrspace(1)* %ptr2, align 4
+  store <5 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -262,7 +262,7 @@ define <5 x i16> @add_v5i16_arg(<5 x i16> %a, <5 x i16> %b) {
   ret <5 x i16> %add
 }
 
-define void @add_v6i16(<6 x i16> addrspace(1)* %ptra, <6 x i16> addrspace(1)* %ptrb, <6 x i16> addrspace(1)* %ptr2) {
+define void @add_v6i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v6i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -294,10 +294,10 @@ define void @add_v6i16(<6 x i16> addrspace(1)* %ptra, <6 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_dwordx3 v[4:5], v[0:2], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <6 x i16>, <6 x i16> addrspace(1)* %ptra, align 4
-  %b = load <6 x i16>, <6 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <6 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <6 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <6 x i16> %a, %b
-  store <6 x i16> %add, <6 x i16> addrspace(1)* %ptr2, align 4
+  store <6 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -327,7 +327,7 @@ define <6 x i16> @add_v6i16_arg(<6 x i16> %a, <6 x i16> %b) {
   ret <6 x i16> %add
 }
 
-define void @addv_7i16(<7 x i16> addrspace(1)* %ptra, <7 x i16> addrspace(1)* %ptrb, <7 x i16> addrspace(1)* %ptr2) {
+define void @addv_7i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: addv_7i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -449,10 +449,10 @@ define void @addv_7i16(<7 x i16> addrspace(1)* %ptra, <7 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_short v[4:5], v8, off offset:12
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <7 x i16>, <7 x i16> addrspace(1)* %ptra, align 4
-  %b = load <7 x i16>, <7 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <7 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <7 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <7 x i16> %a, %b
-  store <7 x i16> %add, <7 x i16> addrspace(1)* %ptr2, align 4
+  store <7 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -484,7 +484,7 @@ define <7 x i16> @add_v7i16_arg(<7 x i16> %a, <7 x i16> %b) {
   ret <7 x i16> %add
 }
 
-define void @add_v9i16(<9 x i16> addrspace(1)* %ptra, <9 x i16> addrspace(1)* %ptrb, <9 x i16> addrspace(1)* %ptr2) {
+define void @add_v9i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v9i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -536,10 +536,10 @@ define void @add_v9i16(<9 x i16> addrspace(1)* %ptra, <9 x i16> addrspace(1)* %p
 ; GFX9-NEXT:    global_store_short v[4:5], v6, off offset:16
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <9 x i16>, <9 x i16> addrspace(1)* %ptra, align 4
-  %b = load <9 x i16>, <9 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <9 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <9 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <9 x i16> %a, %b
-  store <9 x i16> %add, <9 x i16> addrspace(1)* %ptr2, align 4
+  store <9 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -575,7 +575,7 @@ define <9 x i16> @add_v9i16_arg(<9 x i16> %a, <9 x i16> %b) {
   ret <9 x i16> %add
 }
 
-define void @add_v10i16(<10 x i16> addrspace(1)* %ptra, <10 x i16> addrspace(1)* %ptrb, <10 x i16> addrspace(1)* %ptr2) {
+define void @add_v10i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v10i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -629,14 +629,14 @@ define void @add_v10i16(<10 x i16> addrspace(1)* %ptra, <10 x i16> addrspace(1)*
 ; GFX9-NEXT:    global_store_dword v[4:5], v6, off offset:16
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <10 x i16>, <10 x i16> addrspace(1)* %ptra, align 4
-  %b = load <10 x i16>, <10 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <10 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <10 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <10 x i16> %a, %b
-  store <10 x i16> %add, <10 x i16> addrspace(1)* %ptr2, align 4
+  store <10 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
-define void @add_v11i16(<11 x i16> addrspace(1)* %ptra, <11 x i16> addrspace(1)* %ptrb, <11 x i16> addrspace(1)* %ptr2) {
+define void @add_v11i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v11i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -722,10 +722,10 @@ define void @add_v11i16(<11 x i16> addrspace(1)* %ptra, <11 x i16> addrspace(1)*
 ; GFX9-NEXT:    global_store_short v[4:5], v6, off offset:20
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <11 x i16>, <11 x i16> addrspace(1)* %ptra, align 4
-  %b = load <11 x i16>, <11 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <11 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <11 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <11 x i16> %a, %b
-  store <11 x i16> %add, <11 x i16> addrspace(1)* %ptr2, align 4
+  store <11 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
@@ -765,7 +765,7 @@ define <11 x i16> @add_v11i16_arg(<11 x i16> %a, <11 x i16> %b) {
   ret <11 x i16> %add
 }
 
-define void @add_v12i16(<12 x i16> addrspace(1)* %ptra, <12 x i16> addrspace(1)* %ptrb, <12 x i16> addrspace(1)* %ptr2) {
+define void @add_v12i16(ptr addrspace(1) %ptra, ptr addrspace(1) %ptrb, ptr addrspace(1) %ptr2) {
 ; GFX8-LABEL: add_v12i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -823,10 +823,10 @@ define void @add_v12i16(<12 x i16> addrspace(1)* %ptra, <12 x i16> addrspace(1)*
 ; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[6:7], off offset:16
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  %a = load <12 x i16>, <12 x i16> addrspace(1)* %ptra, align 4
-  %b = load <12 x i16>, <12 x i16> addrspace(1)* %ptrb, align 4
+  %a = load <12 x i16>, ptr addrspace(1) %ptra, align 4
+  %b = load <12 x i16>, ptr addrspace(1) %ptrb, align 4
   %add = add <12 x i16> %a, %b
-  store <12 x i16> %add, <12 x i16> addrspace(1)* %ptr2, align 4
+  store <12 x i16> %add, ptr addrspace(1) %ptr2, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
index 05cbd6f9942bf..5c451bffc2278 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/amdgpu-irtranslator.ll
@@ -8,6 +8,6 @@
 ; CHECK: {{%[0-9]+}}:_(s32) = G_ADD
 define amdgpu_kernel void @addi32(i32 %arg1, i32 %arg2) {
   %res = add i32 %arg1, %arg2
-  store i32 %res, i32 addrspace(1)* undef
+  store i32 %res, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
index 031925033ec3c..ec5c224cb1b28 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
 
-declare hidden i32 addrspace(1)* @ext(i8 addrspace(1)*)
+declare hidden ptr addrspace(1) @ext(ptr addrspace(1))
 
-define i32 addrspace(1)* @call_assert_align() {
+define ptr addrspace(1) @call_assert_align() {
 ; CHECK-LABEL: call_assert_align:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,12 +34,12 @@ define i32 addrspace(1)* @call_assert_align() {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %call = call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
-  store volatile i32 0, i32 addrspace(1)* %call
-  ret i32 addrspace(1)* %call
+  %call = call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null)
+  store volatile i32 0, ptr addrspace(1) %call
+  ret ptr addrspace(1) %call
 }
 
-define i32 addrspace(1)* @tail_call_assert_align() {
+define ptr addrspace(1) @tail_call_assert_align() {
 ; CHECK-LABEL: tail_call_assert_align:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,6 +50,6 @@ define i32 addrspace(1)* @tail_call_assert_align() {
 ; CHECK-NEXT:    s_addc_u32 s17, s17, ext@rel32@hi+12
 ; CHECK-NEXT:    s_setpc_b64 s[16:17]
 entry:
-  %call = tail call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
-  ret i32 addrspace(1)* %call
+  %call = tail call align 4 ptr addrspace(1) @ext(ptr addrspace(1) null)
+  ret ptr addrspace(1) %call
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
index 1054bdcafd423..4618fc9fdce2d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_load_local.ll
@@ -8,8 +8,8 @@
 ; GCN-NEXT: ds_read_u8 v0, v0{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
-  %load = load atomic i8, i8 addrspace(3)* %ptr monotonic, align 1
+define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
+  %load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
   ret i8 %load
 }
 
@@ -20,9 +20,9 @@ define i8 @atomic_load_monotonic_i8(i8 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_u8 v0, v0 offset:16{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
-  %load = load atomic i8, i8 addrspace(3)* %gep monotonic, align 1
+define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
+  %load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
   ret i8 %load
 }
 
@@ -33,8 +33,8 @@ define i8 @atomic_load_monotonic_i8_offset(i8 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_u16 v0, v0{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
-  %load = load atomic i16, i16 addrspace(3)* %ptr monotonic, align 2
+define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
+  %load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
   ret i16 %load
 }
 
@@ -45,9 +45,9 @@ define i16 @atomic_load_monotonic_i16(i16 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_u16 v0, v0 offset:32{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
-  %load = load atomic i16, i16 addrspace(3)* %gep monotonic, align 2
+define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
+  %load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
   ret i16 %load
 }
 
@@ -58,8 +58,8 @@ define i16 @atomic_load_monotonic_i16_offset(i16 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b32 v0, v0{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
-  %load = load atomic i32, i32 addrspace(3)* %ptr monotonic, align 4
+define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
+  %load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
   ret i32 %load
 }
 
@@ -70,9 +70,9 @@ define i32 @atomic_load_monotonic_i32(i32 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
-  %load = load atomic i32, i32 addrspace(3)* %gep monotonic, align 4
+define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
   ret i32 %load
 }
 
@@ -83,8 +83,8 @@ define i32 @atomic_load_monotonic_i32_offset(i32 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b64 v[0:1], v0{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
-  %load = load atomic i64, i64 addrspace(3)* %ptr monotonic, align 8
+define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
+  %load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
   ret i64 %load
 }
 
@@ -95,9 +95,9 @@ define i64 @atomic_load_monotonic_i64(i64 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i32 16
-  %load = load atomic i64, i64 addrspace(3)* %gep monotonic, align 8
+define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
   ret i64 %load
 }
 
@@ -108,9 +108,9 @@ define i64 @atomic_load_monotonic_i64_offset(i64 addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds float, float addrspace(3)* %ptr, i32 16
-  %load = load atomic float, float addrspace(3)* %gep monotonic, align 4
+define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
   ret float %load
 }
 
@@ -121,9 +121,9 @@ define float @atomic_load_monotonic_f32_offset(float addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds double, double addrspace(3)* %ptr, i32 16
-  %load = load atomic double, double addrspace(3)* %gep monotonic, align 8
+define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
   ret double %load
 }
 
@@ -134,10 +134,10 @@ define double @atomic_load_monotonic_f64_offset(double addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b64 v[0:1], v0 offset:128{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i8*, i8* addrspace(3)* %ptr, i32 16
-  %load = load atomic i8*, i8* addrspace(3)* %gep monotonic, align 8
-  ret i8* %load
+define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
+  %load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
+  ret ptr %load
 }
 
 ; GCN-LABEL: {{^}}atomic_load_monotonic_p3i8_offset:
@@ -147,8 +147,8 @@ define i8* @atomic_load_monotonic_p0i8_offset(i8* addrspace(3)* %ptr) {
 ; GCN-NEXT: ds_read_b32 v0, v0 offset:64{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define i8 addrspace(3)* @atomic_load_monotonic_p3i8_offset(i8 addrspace(3)* addrspace(3)* %ptr) {
-  %gep = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %ptr, i32 16
-  %load = load atomic i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %gep monotonic, align 4
-  ret i8 addrspace(3)* %load
+define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr) {
+  %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
+  %load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
+  ret ptr addrspace(3) %load
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
index 63187c24c6333..0492985215b48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_store_local.ll
@@ -8,8 +8,8 @@
 ; GCN-NEXT: ds_write_b8 v0, v1{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
-  store atomic i8 %val, i8 addrspace(3)* %ptr monotonic, align 1
+define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
+  store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1
   ret void
 }
 
@@ -20,9 +20,9 @@ define void @atomic_store_monotonic_i8(i8 addrspace(3)* %ptr, i8 %val) {
 ; GCN-NEXT: ds_write_b8 v0, v1 offset:16{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
-  %gep = getelementptr inbounds i8, i8 addrspace(3)* %ptr, i8 16
-  store atomic i8 %val, i8 addrspace(3)* %gep monotonic, align 1
+define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
+  %gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
+  store atomic i8 %val, ptr addrspace(3) %gep monotonic, align 1
   ret void
 }
 
@@ -33,8 +33,8 @@ define void @atomic_store_monotonic_offset_i8(i8 addrspace(3)* %ptr, i8 %val) {
 ; GCN-NEXT: ds_write_b16 v0, v1{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
-  store atomic i16 %val, i16 addrspace(3)* %ptr monotonic, align 2
+define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
+  store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2
   ret void
 }
 
@@ -45,9 +45,9 @@ define void @atomic_store_monotonic_i16(i16 addrspace(3)* %ptr, i16 %val) {
 ; GCN-NEXT: ds_write_b16 v0, v1 offset:32{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val) {
-  %gep = getelementptr inbounds i16, i16 addrspace(3)* %ptr, i16 16
-  store atomic i16 %val, i16 addrspace(3)* %gep monotonic, align 2
+define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val) {
+  %gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
+  store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2
   ret void
 }
 
@@ -58,8 +58,8 @@ define void @atomic_store_monotonic_offset_i16(i16 addrspace(3)* %ptr, i16 %val)
 ; GCN-NEXT: ds_write_b32 v0, v1{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
-  store atomic i32 %val, i32 addrspace(3)* %ptr monotonic, align 4
+define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
+  store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4
   ret void
 }
 
@@ -70,9 +70,9 @@ define void @atomic_store_monotonic_i32(i32 addrspace(3)* %ptr, i32 %val) {
 ; GCN-NEXT: ds_write_b32 v0, v1 offset:64{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val) {
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %ptr, i32 16
-  store atomic i32 %val, i32 addrspace(3)* %gep monotonic, align 4
+define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val) {
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
+  store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4
   ret void
 }
 
@@ -83,8 +83,8 @@ define void @atomic_store_monotonic_offset_i32(i32 addrspace(3)* %ptr, i32 %val)
 ; GCN-NEXT: ds_write_b64 v0, v[1:2]{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
-  store atomic i64 %val, i64 addrspace(3)* %ptr monotonic, align 8
+define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
+  store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8
   ret void
 }
 
@@ -95,9 +95,9 @@ define void @atomic_store_monotonic_i64(i64 addrspace(3)* %ptr, i64 %val) {
 ; GCN-NEXT: ds_write_b64 v0, v[1:2] offset:128{{$}}
 ; GCN-NEXT: s_waitcnt lgkmcnt(0)
 ; GCN-NEXT: s_setpc_b64
-define void @atomic_store_monotonic_offset_i64(i64 addrspace(3)* %ptr, i64 %val) {
-  %gep = getelementptr inbounds i64, i64 addrspace(3)* %ptr, i64 16
-  store atomic i64 %val, i64 addrspace(3)* %gep monotonic, align 8
+define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val) {
+  %gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16
+  store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
index cfc58aaae5be6..9fd265394e3c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bool-legalization.ll
@@ -77,11 +77,11 @@ entry:
   br i1 %trunc, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   unreachable
 
 bb1:
-  store volatile i32 1, i32 addrspace(1)* undef
+  store volatile i32 1, ptr addrspace(1) undef
   unreachable
 }
 
@@ -118,10 +118,10 @@ entry:
   br i1 %and, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   unreachable
 
 bb1:
-  store volatile i32 1, i32 addrspace(1)* undef
+  store volatile i32 1, ptr addrspace(1) undef
   unreachable
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
index 35932d42a0646..44c3c3d333267 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
 
-define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float> addrspace(4)* %ptr) {
+define void @value_finder_bug(ptr addrspace(5) %store_ptr, ptr addrspace(4) %ptr) {
 ; GFX10-LABEL: value_finder_bug:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -12,10 +12,10 @@ define void @value_finder_bug(<2 x float> addrspace(5)* %store_ptr, <4 x float>
 ; GFX10-NEXT:    buffer_store_dword v4, v0, s[0:3], 0 offen offset:4
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <4 x float>, <4 x float> addrspace(4)* %ptr, align 4
+  %vec = load <4 x float>, ptr addrspace(4) %ptr, align 4
   %vec.3 = extractelement <4 x float> %vec, i32 3
   %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 2, i32 undef>
   %new_vec = insertelement <2 x float> %shuffle, float %vec.3, i32 1
-  store <2 x float> %new_vec, <2 x float> addrspace(5)* %store_ptr, align 8
+  store <2 x float> %new_vec, ptr addrspace(5) %store_ptr, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index a607ccb6946a0..37592a7f99ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -7,7 +7,7 @@
 ; tests are unified.
 
 declare hidden void @external_void_func_v16i32_v16i32_v4i32(<16 x i32>, <16 x i32>, <4 x i32>) #0
-declare hidden void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32])) #0
+declare hidden void @external_void_func_byval(ptr addrspace(5) byval([16 x i32])) #0
 
 define amdgpu_kernel void @kernel_caller_stack() {
 ; MUBUF-LABEL: kernel_caller_stack:
@@ -173,6 +173,7 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    s_mov_b32 s2, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_lo, 0
 ; FLATSCR-NEXT:    s_mov_b32 vcc_hi, 0
+; FLATSCR-NEXT:    s_mov_b32 s40, 0
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s11 offset:24
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s10 offset:32
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s9 offset:40
@@ -187,7 +188,6 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], s2 offset:112
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_lo offset:120
 ; FLATSCR-NEXT:    scratch_store_dwordx2 off, v[0:1], vcc_hi offset:128
-; FLATSCR-NEXT:    s_mov_b32 s40, 0
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[0:1], off, s40 offset:8
 ; FLATSCR-NEXT:    s_mov_b32 s39, 0
 ; FLATSCR-NEXT:    scratch_load_dwordx2 v[2:3], off, s39 offset:16
@@ -226,9 +226,8 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; FLATSCR-NEXT:    s_swappc_b64 s[30:31], s[0:1]
 ; FLATSCR-NEXT:    s_endpgm
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %cast = bitcast [16 x i32] addrspace(5)* %alloca to i8 addrspace(5)*
-  call void @llvm.memset.p5i8.i32(i8 addrspace(5)* align 4 %cast, i8 0, i32 128, i1 false)
-  call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %alloca)
+  call void @llvm.memset.p5.i32(ptr addrspace(5) align 4 %alloca, i8 0, i32 128, i1 false)
+  call void @external_void_func_byval(ptr addrspace(5) byval([16 x i32]) %alloca)
   ret void
 }
 
@@ -302,7 +301,7 @@ define void @func_caller_stack() {
   ret void
 }
 
-define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) {
+define void @func_caller_byval(ptr addrspace(5) %argptr) {
 ; MUBUF-LABEL: func_caller_byval:
 ; MUBUF:       ; %bb.0:
 ; MUBUF-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -431,12 +430,11 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) {
 ; FLATSCR-NEXT:    s_mov_b64 exec, s[0:1]
 ; FLATSCR-NEXT:    s_waitcnt vmcnt(0)
 ; FLATSCR-NEXT:    s_setpc_b64 s[30:31]
-  %cast = bitcast [16 x i32] addrspace(5)* %argptr to i8 addrspace(5)*
-  call void @external_void_func_byval([16 x i32] addrspace(5)* byval([16 x i32]) %argptr)
+  call void @external_void_func_byval(ptr addrspace(5) byval([16 x i32]) %argptr)
   ret void
 }
 
-declare void @llvm.memset.p5i8.i32(i8 addrspace(5)* nocapture writeonly, i8, i32, i1 immarg) #1
+declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #1
 
 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
 attributes #1 = { argmemonly nofree nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
index e39c3e9d3339a..cbbf9d17b3dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
@@ -128,7 +128,7 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
   ret float %b
 }
 
-define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) {
+define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
 ; GFX9-LABEL: test_add_mul_multiple_defs_z:
 ; GFX9:       ; %bb.0: ; %.entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -204,13 +204,13 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, <2 x float> addrs
 ; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
 .entry:
   %a = fmul float %x, %y
-  %vec = load <2 x float>, <2 x float> addrspace(1)* %vec_ptr
+  %vec = load <2 x float>, ptr addrspace(1) %vec_ptr
   %z = extractelement <2 x float> %vec, i64 1
   %b = fadd float %a, %z
   ret float %b
 }
 
-define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, <2 x float> addrspace(1)* %vec_ptr) {
+define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
 ; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z:
 ; GFX9:       ; %bb.0: ; %.entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -286,7 +286,7 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, <2 x float> a
 ; GFX10-UNSAFE-NEXT:    s_setpc_b64 s[30:31]
 .entry:
   %a = fmul float %x, %y
-  %vec = load <2 x float>, <2 x float> addrspace(1)* %vec_ptr
+  %vec = load <2 x float>, ptr addrspace(1) %vec_ptr
   %z = extractelement <2 x float> %vec, i64 1
   %b = fadd float %z, %a
   ret float %b
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
index b337a37dae99f..356ef52bf21b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combiner-crash.ll
@@ -3,6 +3,6 @@
 define amdgpu_kernel void @test_long_add4() {
 entry:
   %add = add <4 x i64> zeroinitializer, zeroinitializer
-  store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
+  store <4 x i64> %add, ptr addrspace(1) null, align 32
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
index 827574baafa79..9247eca4b6754 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/crash-stack-address-O0.ll
@@ -20,6 +20,6 @@ define amdgpu_kernel void @stack_write_fi() {
 ; CHECK-NEXT:    s_endpgm
 entry:
   %alloca = alloca i64, align 4, addrspace(5)
-  store volatile i64 0, i64 addrspace(5)* %alloca, align 4
+  store volatile i64 0, ptr addrspace(5) %alloca, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index c8eb1a4acc351..58a1adaafd0b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -96,7 +96,7 @@ define float @v_uitofp_to_f32_multi_use_lshr8_mask255(i32 %arg0) nounwind {
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_setpc_b64 s[30:31]
   %lshr.8 = lshr i32 %arg0, 8
-  store i32 %lshr.8, i32 addrspace(1)* undef
+  store i32 %lshr.8, ptr addrspace(1) undef
   %masked = and i32 %lshr.8, 255
   %cvt = uitofp i32 %masked to float
   ret float %cvt
@@ -449,7 +449,7 @@ define double @v_uitofp_i8_to_f64(i8 %arg0) nounwind {
   ret double %cvt
 }
 
-define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_i8_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_i8_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -483,14 +483,14 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
-  %load = load i8, i8 addrspace(1)* %gep, align 1
+  %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid
+  %load = load i8, ptr addrspace(1) %gep, align 1
   %cvt = uitofp i8 %load to float
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v2i8_to_v2f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v2i8_to_v2f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -529,14 +529,14 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2
+  %gep = getelementptr <2 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <2 x i8>, ptr addrspace(1) %gep, align 2
   %cvt = uitofp <2 x i8> %load to <2 x float>
-  store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
+  store <2 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v3i8_to_v3f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v3i8_to_v3f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -579,14 +579,14 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dwordx3 v[3:4], v[0:2]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4
+  %gep = getelementptr <3 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <3 x i8>, ptr addrspace(1) %gep, align 4
   %cvt = uitofp <3 x i8> %load to <3 x float>
-  store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
+  store <3 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v4i8_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v4i8_to_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -630,10 +630,10 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
+  %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <4 x i8>, ptr addrspace(1) %gep, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
-  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
@@ -641,7 +641,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
 ; position in the word for the component.
 
 ; FIXME: Packing bytes
-define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v4i8_to_v4f32_unaligned:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -715,14 +715,14 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
+  %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <4 x i8>, ptr addrspace(1) %gep, align 1
   %cvt = uitofp <4 x i8> %load to <4 x float>
-  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %out2, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v4i8_to_v4f32_2_uses:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
@@ -808,16 +808,16 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
-  %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %in.ptr, align 4
+  %in.ptr = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid.x
+  %load = load <4 x i8>, ptr addrspace(1) %in.ptr, align 4
   %cvt = uitofp <4 x i8> %load to <4 x float>
-  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %cvt, ptr addrspace(1) %out, align 16
   %add = add <4 x i8> %load, <i8 9, i8 9, i8 9, i8 9> ; Second use of %load
-  store <4 x i8> %add, <4 x i8> addrspace(1)* %out2, align 4
+  store <4 x i8> %add, ptr addrspace(1) %out2, align 4
   ret void
 }
 
-define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v7i8_to_v7f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v7i8_to_v7f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -907,14 +907,14 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dwordx3 v[9:10], v[4:6]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1
+  %gep = getelementptr <7 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <7 x i8>, ptr addrspace(1) %gep, align 1
   %cvt = uitofp <7 x i8> %load to <7 x float>
-  store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
+  store <7 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @load_v8i8_to_v8f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: load_v8i8_to_v8f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -975,14 +975,14 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8
+  %gep = getelementptr <8 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <8 x i8>, ptr addrspace(1) %gep, align 8
   %cvt = uitofp <8 x i8> %load to <8 x float>
-  store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
+  store <8 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: i8_zext_inreg_i32_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1020,16 +1020,16 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %load = load i32, ptr addrspace(1) %gep, align 4
   %add = add i32 %load, 2
   %inreg = and i32 %add, 255
   %cvt = uitofp i32 %inreg to float
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: i8_zext_inreg_hi1_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1065,18 +1065,18 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %load = load i32, i32 addrspace(1)* %gep, align 4
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %load = load i32, ptr addrspace(1) %gep, align 4
   %inreg = and i32 %load, 65280
   %shr = lshr i32 %inreg, 8
   %cvt = uitofp i32 %shr to float
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; We don't get these ones because of the zext, but instcombine removes
 ; them so it shouldn't really matter.
-define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @i8_zext_i32_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: i8_zext_i32_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1110,15 +1110,15 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
-  %load = load i8, i8 addrspace(1)* %gep, align 1
+  %gep = getelementptr i8, ptr addrspace(1) %in, i32 %tid
+  %load = load i8, ptr addrspace(1) %gep, align 1
   %ext = zext i8 %load to i32
   %cvt = uitofp i32 %ext to float
-  store float %cvt, float addrspace(1)* %out, align 4
+  store float %cvt, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: v4i8_zext_v4i32_to_v4f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1192,15 +1192,15 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
-  %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
+  %gep = getelementptr <4 x i8>, ptr addrspace(1) %in, i32 %tid
+  %load = load <4 x i8>, ptr addrspace(1) %gep, align 1
   %ext = zext <4 x i8> %load to <4 x i32>
   %cvt = uitofp <4 x i32> %ext to <4 x float>
-  store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %cvt, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @extract_byte0_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: extract_byte0_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1235,15 +1235,15 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %val = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %val = load i32, ptr addrspace(1) %gep
   %and = and i32 %val, 255
   %cvt = uitofp i32 %and to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @extract_byte1_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: extract_byte1_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1279,16 +1279,16 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %val = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %val = load i32, ptr addrspace(1) %gep
   %srl = lshr i32 %val, 8
   %and = and i32 %srl, 255
   %cvt = uitofp i32 %and to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @extract_byte2_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: extract_byte2_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1324,16 +1324,16 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %val = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %val = load i32, ptr addrspace(1) %gep
   %srl = lshr i32 %val, 16
   %and = and i32 %srl, 255
   %cvt = uitofp i32 %and to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
+define amdgpu_kernel void @extract_byte3_to_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; SI-LABEL: extract_byte3_to_f32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1368,16 +1368,16 @@ define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
-  %val = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %in, i32 %tid
+  %val = load i32, ptr addrspace(1) %gep
   %srl = lshr i32 %val, 24
   %and = and i32 %srl, 255
   %cvt = uitofp i32 %and to float
-  store float %cvt, float addrspace(1)* %out
+  store float %cvt, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float addrspace(1)* %out) {
+define amdgpu_kernel void @cvt_ubyte0_or_multiuse(ptr addrspace(1) %in, ptr addrspace(1) %out) {
 ; SI-LABEL: cvt_ubyte0_or_multiuse:
 ; SI:       ; %bb.0: ; %bb
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1418,14 +1418,14 @@ define amdgpu_kernel void @cvt_ubyte0_or_multiuse(i32 addrspace(1)* %in, float a
 ; VI-NEXT:    s_endpgm
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %lid
-  %load = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %lid
+  %load = load i32, ptr addrspace(1) %gep
   %or = or i32 %load, -2147483647
   %and = and i32 %or, 255
   %uitofp = uitofp i32 %and to float
   %cast = bitcast i32 %or to float
   %add = fadd float %cast, %uitofp
-  store float %add, float addrspace(1)* %out
+  store float %add, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index af883e8d6a91c..25e2267fdee89 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -23,7 +23,7 @@ entry:
   br i1 %c, label %if.true, label %endif
 
 if.true:
-  %val = load volatile i32, i32 addrspace(1)* undef
+  %val = load volatile i32, ptr addrspace(1) undef
   br label %endif
 
 endif:
@@ -55,7 +55,7 @@ endif:
   ret i32 %v
 
 if.true:
-  %val = load volatile i32, i32 addrspace(1)* undef
+  %val = load volatile i32, ptr addrspace(1) undef
   br label %endif
 }
 
@@ -81,7 +81,7 @@ entry:
   br i1 %c, label %if.true, label %endif
 
 if.true:
-  %val = load volatile i32, i32 addrspace(1)* undef
+  %val = load volatile i32, ptr addrspace(1) undef
   br label %endif
 
 endif:
@@ -90,7 +90,7 @@ endif:
 }
 
 ; Make sure and 1 is inserted on llvm.amdgcn.if
-define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) {
+define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: divergent_if_nonboolean_condition1:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -109,12 +109,12 @@ define i32 @divergent_if_nonboolean_condition1(i32 addrspace(1)* %ptr) {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %value = load i32, i32 addrspace(1)* %ptr
+  %value = load i32, ptr addrspace(1) %ptr
   %c = trunc i32 %value to i1
   br i1 %c, label %if.true, label %endif
 
 if.true:
-  %val = load volatile i32, i32 addrspace(1)* undef
+  %val = load volatile i32, ptr addrspace(1) undef
   br label %endif
 
 endif:
@@ -123,7 +123,7 @@ endif:
 }
 
 @external_constant = external addrspace(4) constant i32, align 4
-@const.ptr = external addrspace(4) constant float*, align 4
+@const.ptr = external addrspace(4) constant ptr, align 4
 
 ; Make sure this case compiles. G_ICMP was mis-mapped due to having
 ; the result register class constrained by llvm.amdgcn.if lowering.
@@ -167,13 +167,13 @@ define void @constrained_if_register_class() {
 ; CHECK-NEXT:    s_waitcnt vmcnt(0)
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  %tmp = load i32, i32 addrspace(4)* @external_constant
+  %tmp = load i32, ptr addrspace(4) @external_constant
   %tmp1 = icmp ne i32 %tmp, 0
   br i1 %tmp1, label %bb12, label %bb2
 
 bb2:
-  %ptr = load float*, float* addrspace(4)* @const.ptr
-  %tmp4 = load float, float* %ptr, align 4
+  %ptr = load ptr, ptr addrspace(4) @const.ptr
+  %tmp4 = load float, ptr %ptr, align 4
   %tmp5 = fcmp olt float %tmp4, 1.0
   %tmp6 = or i1 %tmp5, false
   br i1 %tmp6, label %bb8, label %bb7
@@ -187,7 +187,7 @@ bb8:
   br i1 %tmp10, label %bb11, label %bb12
 
 bb11:
-  store float 4.0, float addrspace(5)* undef, align 4
+  store float 4.0, ptr addrspace(5) undef, align 4
   br label %bb12
 
 bb12:
@@ -235,7 +235,7 @@ bb1:
   br i1 %cmp0, label %bb4, label %bb9
 
 bb4:
-  %load = load volatile i32, i32 addrspace(1)* undef, align 4
+  %load = load volatile i32, ptr addrspace(1) undef, align 4
   %cmp1 = icmp slt i32 %tmp, %load
   br i1 %cmp1, label %bb1, label %bb9
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index 649d9a4d5966d..bb6f809df53f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -55,7 +55,7 @@ entry:
   ret void
 }
 
-define void @returnaddress_debug_loc(i8* addrspace(1)* %ptr) {
+define void @returnaddress_debug_loc(ptr addrspace(1) %ptr) {
   ; CHECK-LABEL: name: returnaddress_debug_loc
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
@@ -68,12 +68,12 @@ define void @returnaddress_debug_loc(i8* addrspace(1)* %ptr) {
   ; CHECK-NEXT:   GLOBAL_STORE_DWORDX2 [[REG_SEQUENCE]], [[COPY3]], 0, 0, implicit $exec :: (store (p0) into %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
 entry:
-  %returnaddr = call i8* @llvm.returnaddress(i32 0), !dbg !6
-  store i8* %returnaddr, i8* addrspace(1)* %ptr, align 8
+  %returnaddr = call ptr @llvm.returnaddress(i32 0), !dbg !6
+  store ptr %returnaddr, ptr addrspace(1) %ptr, align 8
   ret void
 }
 
-declare i8* @llvm.returnaddress(i32 immarg) #0
+declare ptr @llvm.returnaddress(i32 immarg) #0
 
 attributes #0 = { nofree nosync nounwind readnone willreturn }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
index 256b95d222bea..5dae7885f6bfb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
@@ -2,24 +2,24 @@
 
 ; ERR: remark: <unknown>:0:0: cannot select: %{{[0-9]+}}:sreg_32(p5) = G_DYN_STACKALLOC %{{[0-9]+}}:vgpr(s32), 1 (in function: kernel_dynamic_stackalloc_vgpr_align4)
 ; ERR-NEXT: warning: Instruction selection used fallback path for kernel_dynamic_stackalloc_vgpr_align4
-; ERR-NEXT: error: <unknown>:0:0: in function kernel_dynamic_stackalloc_vgpr_align4 void (i32 addrspace(1)*): unsupported dynamic alloca
+; ERR-NEXT: error: <unknown>:0:0: in function kernel_dynamic_stackalloc_vgpr_align4 void (ptr addrspace(1)): unsupported dynamic alloca
 
 ; ERR: remark: <unknown>:0:0: cannot select: %{{[0-9]+}}:sreg_32(p5) = G_DYN_STACKALLOC %{{[0-9]+}}:vgpr(s32), 1 (in function: func_dynamic_stackalloc_vgpr_align4)
 ; ERR-NEXT: warning: Instruction selection used fallback path for func_dynamic_stackalloc_vgpr_align4
 ; ERR-NEXT: error: <unknown>:0:0: in function func_dynamic_stackalloc_vgpr_align4 void (i32): unsupported dynamic alloca
 
-define amdgpu_kernel void @kernel_dynamic_stackalloc_vgpr_align4(i32 addrspace(1)* %ptr) {
+define amdgpu_kernel void @kernel_dynamic_stackalloc_vgpr_align4(ptr addrspace(1) %ptr) {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
-  %n = load i32, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
+  %n = load i32, ptr addrspace(1) %gep
   %alloca = alloca i32, i32 %n, align 4, addrspace(5)
-  store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(1)* undef
+  store volatile ptr addrspace(5) %alloca, ptr addrspace(1) undef
   ret void
 }
 
 define void @func_dynamic_stackalloc_vgpr_align4(i32 %n) {
   %alloca = alloca i32, i32 %n, align 4, addrspace(5)
-  store volatile i32 addrspace(5)* %alloca, i32 addrspace(5)* addrspace(1)* undef
+  store volatile ptr addrspace(5) %alloca, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
index 50ab759f01279..2e3b716750042 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-uniform.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align4(i32 %n) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %alloca = alloca i32, i32 %n, align 4, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
@@ -139,9 +139,9 @@ define void @func_dynamic_stackalloc_sgpr_align4() {
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %n = load i32, i32 addrspace(4)* @gv, align 4
+  %n = load i32, ptr addrspace(4) @gv, align 4
   %alloca = alloca i32, i32 %n, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
@@ -197,7 +197,7 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align16(i32 %n) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %alloca = alloca i32, i32 %n, align 16, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
@@ -279,9 +279,9 @@ define void @func_dynamic_stackalloc_sgpr_align16() {
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %n = load i32, i32 addrspace(4)* @gv, align 16
+  %n = load i32, ptr addrspace(4) @gv, align 16
   %alloca = alloca i32, i32 %n, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
@@ -340,11 +340,11 @@ define amdgpu_kernel void @kernel_dynamic_stackalloc_sgpr_align32(i32 %n) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %alloca = alloca i32, i32 %n, align 32, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
-define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
+define void @func_dynamic_stackalloc_sgpr_align32(ptr addrspace(1) %out) {
 ; GFX9-LABEL: func_dynamic_stackalloc_sgpr_align32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -428,8 +428,8 @@ define void @func_dynamic_stackalloc_sgpr_align32(i32 addrspace(1)* %out) {
 ; GFX11-NEXT:    scratch_store_b32 off, v0, s0
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %n = load i32, i32 addrspace(4)* @gv
+  %n = load i32, ptr addrspace(4) @gv
   %alloca = alloca i32, i32 %n, align 32, addrspace(5)
-  store i32 0, i32 addrspace(5)* %alloca
+  store i32 0, ptr addrspace(5) %alloca
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index 1e4438526779a..7983cba3478db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -4,7 +4,7 @@
 ; Check lowering of some large extractelement that use the stack
 ; instead of register indexing.
 
-define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
+define i32 @v_extract_v64i32_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v64i32_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -174,12 +174,12 @@ define i32 @v_extract_v64i32_varidx(<64 x i32> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    s_mov_b32 s33, s4
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %elt = extractelement <64 x i32> %vec, i32 %idx
   ret i32 %elt
 }
 
-define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
+define i16 @v_extract_v128i16_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v128i16_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -353,12 +353,12 @@ define i16 @v_extract_v128i16_varidx(<128 x i16> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <128 x i16>, <128 x i16> addrspace(1)* %ptr
+  %vec = load <128 x i16>, ptr addrspace(1) %ptr
   %elt = extractelement <128 x i16> %vec, i32 %idx
   ret i16 %elt
 }
 
-define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
+define i64 @v_extract_v32i64_varidx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GCN-LABEL: v_extract_v32i64_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -529,7 +529,7 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
 ; GCN-NEXT:    s_mov_b32 s33, s4
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <32 x i64>, <32 x i64> addrspace(1)* %ptr
+  %vec = load <32 x i64>, ptr addrspace(1) %ptr
   %elt = extractelement <32 x i64> %vec, i32 %idx
   ret i64 %elt
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
index 093b40114d5ff..5563c5fb41dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v4i128_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx16 s[8:23], s[2:3], 0x0
@@ -32,12 +32,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_sgpr_idx(<4 x i128> addrspace(
 ; GFX11-NEXT:    s_movrels_b64 s[0:1], s[8:9]
 ; GFX11-NEXT:    s_movrels_b64 s[2:3], s[10:11]
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 %idx
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off
@@ -149,12 +149,12 @@ define amdgpu_ps i128 @extractelement_vgpr_v4i128_sgpr_idx(<4 x i128> addrspace(
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 %idx
   ret i128 %element
 }
 
-define i128 @extractelement_vgpr_v4i128_vgpr_idx(<4 x i128> addrspace(1)* %ptr, i32 %idx) {
+define i128 @extractelement_vgpr_v4i128_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -452,12 +452,12 @@ define i128 @extractelement_vgpr_v4i128_vgpr_idx(<4 x i128> addrspace(1)* %ptr,
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-NEXT:    v_cndmask_b32_e64 v3, v5, v15, s0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 %idx
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(<4 x i128> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_sgpr_v4i128_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
@@ -786,12 +786,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_vgpr_idx(<4 x i128> addrspace(
 ; GFX11-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX11-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 %idx
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(<4 x i128> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i128_idx0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
@@ -809,12 +809,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx0(<4 x i128> addrspace(4)*
 ; GFX11-NEXT:    s_load_b512 s[0:15], s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 0
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(<4 x i128> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i128_idx1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
@@ -844,12 +844,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx1(<4 x i128> addrspace(4)*
 ; GFX11-NEXT:    s_mov_b32 s2, s6
 ; GFX11-NEXT:    s_mov_b32 s3, s7
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 1
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(<4 x i128> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i128_idx2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
@@ -879,12 +879,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx2(<4 x i128> addrspace(4)*
 ; GFX11-NEXT:    s_mov_b32 s2, s10
 ; GFX11-NEXT:    s_mov_b32 s3, s11
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 2
   ret i128 %element
 }
 
-define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(<4 x i128> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i128_idx3:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
@@ -914,12 +914,12 @@ define amdgpu_ps i128 @extractelement_sgpr_v4i128_idx3(<4 x i128> addrspace(4)*
 ; GFX11-NEXT:    s_mov_b32 s2, s14
 ; GFX11-NEXT:    s_mov_b32 s3, s15
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i128>, <4 x i128> addrspace(4)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i128> %vector, i32 3
   ret i128 %element
 }
 
-define i128 @extractelement_vgpr_v4i128_idx0(<4 x i128> addrspace(1)* %ptr) {
+define i128 @extractelement_vgpr_v4i128_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -959,12 +959,12 @@ define i128 @extractelement_vgpr_v4i128_idx0(<4 x i128> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 0
   ret i128 %element
 }
 
-define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
+define i128 @extractelement_vgpr_v4i128_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1006,12 +1006,12 @@ define i128 @extractelement_vgpr_v4i128_idx1(<4 x i128> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:16
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 1
   ret i128 %element
 }
 
-define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
+define i128 @extractelement_vgpr_v4i128_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1053,12 +1053,12 @@ define i128 @extractelement_vgpr_v4i128_idx2(<4 x i128> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:32
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 2
   ret i128 %element
 }
 
-define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
+define i128 @extractelement_vgpr_v4i128_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i128_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1100,7 +1100,7 @@ define i128 @extractelement_vgpr_v4i128_idx3(<4 x i128> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:48
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i128>, <4 x i128> addrspace(1)* %ptr
+  %vector = load <4 x i128>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i128> %vector, i32 3
   ret i128 %element
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
index 7b0162aa7d3d4..c0191db08bbff 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -43,12 +43,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_sgpr_idx(<4 x i16> addrspace(4)*
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -115,12 +115,12 @@ define amdgpu_ps i16 @extractelement_vgpr_v4i16_sgpr_idx(<4 x i16> addrspace(1)*
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32 %idx) {
+define i16 @extractelement_vgpr_v4i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -188,12 +188,12 @@ define i16 @extractelement_vgpr_v4i16_vgpr_idx(<4 x i16> addrspace(1)* %ptr, i32
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_vgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -238,12 +238,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_vgpr_idx(<4 x i16> addrspace(4)*
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_idx0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -261,12 +261,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx0(<4 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 0
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_idx1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -287,12 +287,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx1(<4 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 1
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_idx2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -313,12 +313,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx2(<4 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 2
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i16_idx3:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -339,12 +339,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v4i16_idx3(<4 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s1, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i16> %vector, i32 3
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v4i16_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -381,12 +381,12 @@ define i16 @extractelement_vgpr_v4i16_idx0(<4 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 0
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v4i16_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -428,12 +428,12 @@ define i16 @extractelement_vgpr_v4i16_idx1(<4 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 1
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v4i16_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -475,12 +475,12 @@ define i16 @extractelement_vgpr_v4i16_idx2(<4 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 2
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v4i16_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i16_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -522,12 +522,12 @@ define i16 @extractelement_vgpr_v4i16_idx3(<4 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vector = load <4 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i16> %vector, i32 3
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -577,12 +577,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_sgpr_idx(<8 x i16> addrspace(4)*
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 4
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
@@ -673,12 +673,12 @@ define amdgpu_ps i16 @extractelement_vgpr_v8i16_sgpr_idx(<8 x i16> addrspace(1)*
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32 %idx) {
+define i16 @extractelement_vgpr_v8i16_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -770,12 +770,12 @@ define i16 @extractelement_vgpr_v8i16_vgpr_idx(<8 x i16> addrspace(1)* %ptr, i32
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 4, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_vgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -836,12 +836,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_vgpr_idx(<8 x i16> addrspace(4)*
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 %idx
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -859,12 +859,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx0(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_load_b128 s[0:3], s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 0
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -885,12 +885,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx1(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 1
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -911,12 +911,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx2(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 2
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx3:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -937,12 +937,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx3(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s1, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 3
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx4:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -963,12 +963,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx4(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, s2
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 4
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx5:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -989,12 +989,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx5(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s2, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 5
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx6:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -1015,12 +1015,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx6(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, s3
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 6
   ret i16 %element
 }
 
-define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i16_idx7:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -1041,12 +1041,12 @@ define amdgpu_ps i16 @extractelement_sgpr_v8i16_idx7(<8 x i16> addrspace(4)* inr
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s3, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i16> %vector, i32 7
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1086,12 +1086,12 @@ define i16 @extractelement_vgpr_v8i16_idx0(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 0
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1136,12 +1136,12 @@ define i16 @extractelement_vgpr_v8i16_idx1(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 1
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1186,12 +1186,12 @@ define i16 @extractelement_vgpr_v8i16_idx2(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 2
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1236,12 +1236,12 @@ define i16 @extractelement_vgpr_v8i16_idx3(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 3
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx4(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1286,12 +1286,12 @@ define i16 @extractelement_vgpr_v8i16_idx4(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 4
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx5(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx5:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1336,12 +1336,12 @@ define i16 @extractelement_vgpr_v8i16_idx5(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 5
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx6(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx6:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1386,12 +1386,12 @@ define i16 @extractelement_vgpr_v8i16_idx6(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 6
   ret i16 %element
 }
 
-define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) {
+define i16 @extractelement_vgpr_v8i16_idx7(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i16_idx7:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1436,7 +1436,7 @@ define i16 @extractelement_vgpr_v8i16_idx7(<8 x i16> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vector = load <8 x i16>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i16> %vector, i32 7
   ret i16 %element
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
index 60be5f2e5e30c..e93ff0dc92ceb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i8.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v4i8_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -33,12 +33,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_sgpr_idx(<4 x i8> addrspace(4)* in
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
@@ -92,12 +92,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v4i8_sgpr_idx(<4 x i8> addrspace(1)* %p
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %idx) {
+define i8 @extractelement_vgpr_v4i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -153,12 +153,12 @@ define i8 @extractelement_vgpr_v4i8_vgpr_idx(<4 x i8> addrspace(1)* %ptr, i32 %i
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_sgpr_v4i8_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -210,12 +210,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_vgpr_idx(<4 x i8> addrspace(4)* in
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i8_idx0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -233,12 +233,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx0(<4 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_load_b32 s0, s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 0
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i8_idx1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -259,12 +259,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx1(<4 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 8
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 1
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i8_idx2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -285,12 +285,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx2(<4 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 2
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v4i8_idx3:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -311,12 +311,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v4i8_idx3(<4 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <4 x i8> %vector, i32 3
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v4i8_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -356,12 +356,12 @@ define i8 @extractelement_vgpr_v4i8_idx0(<4 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b32 v0, v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 0
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v4i8_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -406,12 +406,12 @@ define i8 @extractelement_vgpr_v4i8_idx1(<4 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 1
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v4i8_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -456,12 +456,12 @@ define i8 @extractelement_vgpr_v4i8_idx2(<4 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 2
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v4i8_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v4i8_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -506,12 +506,12 @@ define i8 @extractelement_vgpr_v4i8_idx3(<4 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vector = load <4 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <4 x i8> %vector, i32 3
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -549,12 +549,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_sgpr_idx(<8 x i8> addrspace(4)* in
 ; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -624,12 +624,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v8i8_sgpr_idx(<8 x i8> addrspace(1)* %p
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %idx) {
+define i8 @extractelement_vgpr_v8i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -700,12 +700,12 @@ define i8 @extractelement_vgpr_v8i8_vgpr_idx(<8 x i8> addrspace(1)* %ptr, i32 %i
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_vgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -750,12 +750,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_vgpr_idx(<8 x i8> addrspace(4)* in
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx0:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -773,12 +773,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx0(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_load_b64 s[0:1], s[2:3], 0x0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 0
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx1:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -799,12 +799,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx1(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 8
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 1
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx2:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -825,12 +825,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx2(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 2
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx3:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -851,12 +851,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx3(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, 24
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 3
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx4:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -877,12 +877,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx4(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_mov_b32 s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 4
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx5:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -903,12 +903,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx5(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s1, 8
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 5
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx6:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -929,12 +929,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx6(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s1, 16
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 6
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: extractelement_sgpr_v8i8_idx7:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -955,12 +955,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v8i8_idx7(<8 x i8> addrspace(4)* inreg
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_lshr_b32 s0, s1, 24
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <8 x i8> %vector, i32 7
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1000,12 +1000,12 @@ define i8 @extractelement_vgpr_v8i8_idx0(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b64 v[0:1], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 0
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1050,12 +1050,12 @@ define i8 @extractelement_vgpr_v8i8_idx1(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 1
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1100,12 +1100,12 @@ define i8 @extractelement_vgpr_v8i8_idx2(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 2
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1150,12 +1150,12 @@ define i8 @extractelement_vgpr_v8i8_idx3(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 3
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx4(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1200,12 +1200,12 @@ define i8 @extractelement_vgpr_v8i8_idx4(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 4
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx5(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx5:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1250,12 +1250,12 @@ define i8 @extractelement_vgpr_v8i8_idx5(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 5
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx6(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx6:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1300,12 +1300,12 @@ define i8 @extractelement_vgpr_v8i8_idx6(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 6
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v8i8_idx7(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v8i8_idx7:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1350,12 +1350,12 @@ define i8 @extractelement_vgpr_v8i8_idx7(<8 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vector = load <8 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <8 x i8> %vector, i32 7
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(ptr addrspace(4) inreg %ptr, i32 inreg %idx) {
 ; GCN-LABEL: extractelement_sgpr_v16i8_sgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -1405,12 +1405,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_sgpr_idx(<16 x i8> addrspace(4)*
 ; GFX11-NEXT:    s_lshl_b32 s1, s1, 3
 ; GFX11-NEXT:    s_lshr_b32 s0, s0, s1
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <16 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 inreg %idx) {
+define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(ptr addrspace(1) %ptr, i32 inreg %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_sgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
@@ -1501,12 +1501,12 @@ define amdgpu_ps i8 @extractelement_vgpr_v16i8_sgpr_idx(<16 x i8> addrspace(1)*
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, s0, v0
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32 %idx) {
+define i8 @extractelement_vgpr_v16i8_vgpr_idx(ptr addrspace(1) %ptr, i32 %idx) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_vgpr_idx:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1598,12 +1598,12 @@ define i8 @extractelement_vgpr_v16i8_vgpr_idx(<16 x i8> addrspace(1)* %ptr, i32
 ; GFX11-NEXT:    v_lshlrev_b32_e32 v1, 3, v2
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, v1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)* inreg %ptr, i32 %idx) {
+define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(ptr addrspace(4) inreg %ptr, i32 %idx) {
 ; GCN-LABEL: extractelement_sgpr_v16i8_vgpr_idx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -1664,12 +1664,12 @@ define amdgpu_ps i8 @extractelement_sgpr_v16i8_vgpr_idx(<16 x i8> addrspace(4)*
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX11-NEXT:    ; return to shader part epilog
-  %vector = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(4) %ptr
   %element = extractelement <16 x i8> %vector, i32 %idx
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx0(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1709,12 +1709,12 @@ define i8 @extractelement_vgpr_v16i8_idx0(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 0
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx1(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1759,12 +1759,12 @@ define i8 @extractelement_vgpr_v16i8_idx1(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 1
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx2(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1809,12 +1809,12 @@ define i8 @extractelement_vgpr_v16i8_idx2(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 2
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx3(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx3:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1859,12 +1859,12 @@ define i8 @extractelement_vgpr_v16i8_idx3(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 3
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx4(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1909,12 +1909,12 @@ define i8 @extractelement_vgpr_v16i8_idx4(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 4
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx5(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx5:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1959,12 +1959,12 @@ define i8 @extractelement_vgpr_v16i8_idx5(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 5
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx6(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx6:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2009,12 +2009,12 @@ define i8 @extractelement_vgpr_v16i8_idx6(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 6
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx7(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx7:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2059,12 +2059,12 @@ define i8 @extractelement_vgpr_v16i8_idx7(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 7
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx8(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2109,12 +2109,12 @@ define i8 @extractelement_vgpr_v16i8_idx8(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 8
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx9(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx9:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2159,12 +2159,12 @@ define i8 @extractelement_vgpr_v16i8_idx9(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 9
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx10(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx10:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2209,12 +2209,12 @@ define i8 @extractelement_vgpr_v16i8_idx10(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 10
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx11(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx11:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2259,12 +2259,12 @@ define i8 @extractelement_vgpr_v16i8_idx11(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v2
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 11
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx12(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx12:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2309,12 +2309,12 @@ define i8 @extractelement_vgpr_v16i8_idx12(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 12
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx13(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx13:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2359,12 +2359,12 @@ define i8 @extractelement_vgpr_v16i8_idx13(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 8, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 13
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx14(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx14:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2409,12 +2409,12 @@ define i8 @extractelement_vgpr_v16i8_idx14(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 16, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 14
   ret i8 %element
 }
 
-define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) {
+define i8 @extractelement_vgpr_v16i8_idx15(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: extractelement_vgpr_v16i8_idx15:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2459,7 +2459,7 @@ define i8 @extractelement_vgpr_v16i8_idx15(<16 x i8> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_lshrrev_b32_e32 v0, 24, v3
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vector = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vector = load <16 x i8>, ptr addrspace(1) %ptr
   %element = extractelement <16 x i8> %vector, i32 15
   ret i8 %element
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index 79ca174983420..e5c1d645d2cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -500,7 +500,7 @@ define amdgpu_ps void @dyn_extract_v8i64_const_s_s(i32 inreg %sel) {
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <8 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>, i32 %sel
-  store i64 %ext, i64 addrspace(1)* undef
+  store i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -702,7 +702,7 @@ define amdgpu_ps void @dyn_extract_v8i64_s_v(<8 x i64> inreg %vec, i32 %sel) {
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <8 x i64> %vec, i32 %sel
-  store i64 %ext, i64 addrspace(1)* undef
+  store i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -821,7 +821,7 @@ define amdgpu_ps void @dyn_extract_v8i64_v_s(<8 x i64> %vec, i32 inreg %sel) {
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <8 x i64> %vec, i32 %sel
-  store i64 %ext, i64 addrspace(1)* undef
+  store i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -927,7 +927,7 @@ define amdgpu_ps void @dyn_extract_v8i64_s_s(<8 x i64> inreg %vec, i32 inreg %se
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <8 x i64> %vec, i32 %sel
-  store i64 %ext, i64 addrspace(1)* undef
+  store i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -1586,7 +1586,7 @@ entry:
   ret double %ext
 }
 
-define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %idx) {
+define ptr addrspace(3) @dyn_extract_v8p3_v_v(<8 x ptr addrspace(3)> %vec, i32 %idx) {
 ; GCN-LABEL: dyn_extract_v8p3_v_v:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1626,11 +1626,11 @@ define i8 addrspace(3)* @dyn_extract_v8p3_v_v(<8 x i8 addrspace(3)*> %vec, i32 %
 ; GFX10PLUS-NEXT:    v_cndmask_b32_e32 v0, v0, v7, vcc_lo
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
-  ret i8 addrspace(3)* %ext
+  %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
+  ret ptr addrspace(3) %ext
 }
 
-define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i32 inreg %idx) {
+define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x ptr addrspace(3)> inreg %vec, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_extract_v8p3_s_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 1
@@ -1700,12 +1700,12 @@ define amdgpu_ps void @dyn_extract_v8p3_s_s(<8 x i8 addrspace(3)*> inreg %vec, i
 ; GFX11-NEXT:    ds_store_b32 v0, v0
 ; GFX11-NEXT:    s_endpgm
 entry:
-  %ext = extractelement <8 x i8 addrspace(3)*> %vec, i32 %idx
-  store i8 addrspace(3)* %ext, i8 addrspace(3)* addrspace(3)* undef
+  %ext = extractelement <8 x ptr addrspace(3)> %vec, i32 %idx
+  store ptr addrspace(3) %ext, ptr addrspace(3) undef
   ret void
 }
 
-define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %idx) {
+define ptr addrspace(1) @dyn_extract_v8p1_v_v(<8 x ptr addrspace(1)> %vec, i32 %idx) {
 ; GCN-LABEL: dyn_extract_v8p1_v_v:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1779,11 +1779,11 @@ define i8 addrspace(1)* @dyn_extract_v8p1_v_v(<8 x i8 addrspace(1)*> %vec, i32 %
 ; GFX11-NEXT:    v_dual_cndmask_b32 v0, v0, v14 :: v_dual_cndmask_b32 v1, v1, v15
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
-  ret i8 addrspace(1)* %ext
+  %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
+  ret ptr addrspace(1) %ext
 }
 
-define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i32 inreg %idx) {
+define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x ptr addrspace(1)> inreg %vec, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_extract_v8p1_s_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    s_mov_b32 s0, s2
@@ -1884,8 +1884,8 @@ define amdgpu_ps void @dyn_extract_v8p1_s_s(<8 x i8 addrspace(1)*> inreg %vec, i
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
 entry:
-  %ext = extractelement <8 x i8 addrspace(1)*> %vec, i32 %idx
-  store i8 addrspace(1)* %ext, i8 addrspace(1)* addrspace(1)* undef
+  %ext = extractelement <8 x ptr addrspace(1)> %vec, i32 %idx
+  store ptr addrspace(1) %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -3036,7 +3036,7 @@ entry:
   ret double %ext
 }
 
-define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32 %sel) {
+define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel) {
 ; GPRIDX-LABEL: dyn_extract_v5f64_s_s:
 ; GPRIDX:         .amd_kernel_code_t
 ; GPRIDX-NEXT:     amd_code_version_major = 1
@@ -3405,7 +3405,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(double addrspace(1)* %out, i32
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <5 x double> <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0>, i32 %sel
-  store double %ext, double addrspace(1)* %out
+  store double %ext, ptr addrspace(1) %out
   ret void
 }
 
@@ -3957,7 +3957,7 @@ entry:
   ret float %ext
 }
 
-define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32 %sel) {
+define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %sel) {
 ; GPRIDX-LABEL: dyn_extract_v4f32_s_s_s:
 ; GPRIDX:         .amd_kernel_code_t
 ; GPRIDX-NEXT:     amd_code_version_major = 1
@@ -4299,11 +4299,11 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(float addrspace(1)* %out, i32
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %sel
-  store float %ext, float addrspace(1)* %out
+  store float %ext, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i32 %sel) {
+define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %sel) {
 ; GPRIDX-LABEL: dyn_extract_v4f64_s_s_s:
 ; GPRIDX:         .amd_kernel_code_t
 ; GPRIDX-NEXT:     amd_code_version_major = 1
@@ -4656,11 +4656,11 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(double addrspace(1)* %out, i3
 ; GFX11-NEXT:    s_endpgm
 entry:
   %ext = extractelement <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, i32 %sel
-  store double %ext, double addrspace(1)* %out
+  store double %ext, ptr addrspace(1) %out
   ret void
 }
 
-define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) {
+define i32 @v_extract_v64i32_7(ptr addrspace(1) %ptr) {
 ; GPRIDX-LABEL: v_extract_v64i32_7:
 ; GPRIDX:       ; %bb.0:
 ; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4696,12 +4696,12 @@ define i32 @v_extract_v64i32_7(<64 x i32> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %elt = extractelement <64 x i32> %vec, i32 7
   ret i32 %elt
 }
 
-define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) {
+define i32 @v_extract_v64i32_32(ptr addrspace(1) %ptr) {
 ; GPRIDX-LABEL: v_extract_v64i32_32:
 ; GPRIDX:       ; %bb.0:
 ; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4736,12 +4736,12 @@ define i32 @v_extract_v64i32_32(<64 x i32> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_b128 v[0:3], v[0:1], off offset:128
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %elt = extractelement <64 x i32> %vec, i32 32
   ret i32 %elt
 }
 
-define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) {
+define i32 @v_extract_v64i32_33(ptr addrspace(1) %ptr) {
 ; GPRIDX-LABEL: v_extract_v64i32_33:
 ; GPRIDX:       ; %bb.0:
 ; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4780,12 +4780,12 @@ define i32 @v_extract_v64i32_33(<64 x i32> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %elt = extractelement <64 x i32> %vec, i32 33
   ret i32 %elt
 }
 
-define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) {
+define i32 @v_extract_v64i32_37(ptr addrspace(1) %ptr) {
 ; GPRIDX-LABEL: v_extract_v64i32_37:
 ; GPRIDX:       ; %bb.0:
 ; GPRIDX-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -4821,7 +4821,7 @@ define i32 @v_extract_v64i32_37(<64 x i32> addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %elt = extractelement <64 x i32> %vec, i32 37
   ret i32 %elt
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index d71b58f484a42..3f93141252730 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float %data) {
+define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -23,11 +23,11 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(float* %ptr, float
   ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX11-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr)
   ; GFX11-NEXT:   S_ENDPGM 0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
   ret void
 }
 
-define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %data) {
+define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -50,11 +50,11 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(float* %ptr, float %d
   ; GFX11-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s32) on %ir.ptr)
   ; GFX11-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
   ret float %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) #0 {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -75,11 +75,11 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(float* %ptr, float
   ; GFX11-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX11-NEXT:   FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
   ; GFX11-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic
   ret void
 }
 
-define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %data) #0 {
+define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) #0 {
   ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -102,10 +102,10 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(float* %ptr, float %d
   ; GFX11-NEXT:   [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr)
   ; GFX11-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = atomicrmw fadd float* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic
   ret float %ret
 }
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float*, float)
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float)
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index 4ae1c5647e690..620b1da346762 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 
-define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, double %data) {
+define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(ptr %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -15,11 +15,11 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_intrinsic(double* %ptr, doubl
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
   ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (s64) on %ir.ptr)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
   ret void
 }
 
-define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double %data) {
+define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(ptr %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -38,11 +38,11 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_intrinsic(double* %ptr, double
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, double %data) #0 {
+define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -55,11 +55,11 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(double* %ptr, doubl
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
   ; GFX90A_GFX940-NEXT:   FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic
   ret void
 }
 
-define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double %data) #0 {
+define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: flat_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -78,10 +78,10 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(double* %ptr, double
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = atomicrmw fadd double* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic
   ret double %ret
 }
 
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double*, double)
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
index 94c8fd34926e5..d50ce5e7ecda3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s
 
-define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr, <2 x half> %data) {
+define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(ptr %ptr, <2 x half> %data) {
   ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_no_rtn_intrinsic
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -12,11 +12,11 @@ define amdgpu_ps void @flat_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half>* %ptr,
   ; GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX940-NEXT:   FLAT_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr)
   ; GFX940-NEXT:   S_ENDPGM 0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data)
   ret void
 }
 
-define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %ptr, <2 x half> %data) {
+define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(ptr %ptr, <2 x half> %data) {
   ; GFX940-LABEL: name: flat_atomic_fadd_v2f16_rtn_intrinsic
   ; GFX940: bb.1 (%ir-block.0):
   ; GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -28,8 +28,8 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(<2 x half>* %p
   ; GFX940-NEXT:   [[FLAT_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr)
   ; GFX940-NEXT:   $vgpr0 = COPY [[FLAT_ATOMIC_PK_ADD_F16_RTN]]
   ; GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half>*, <2 x half>)
+declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr, <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
index 8bc4111abe1a5..0edc177a8be5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
@@ -38,8 +38,8 @@ define amdgpu_ps void @amdgpu_ps() {
 ; PAL-NEXT:    s_waitcnt vmcnt(0)
 ; PAL-NEXT:    s_endpgm
   %alloca = alloca i32, addrspace(5)
-  %cast = addrspacecast i32 addrspace(5)* %alloca to i32*
-  store volatile i32 0, i32* %cast
+  %cast = addrspacecast ptr addrspace(5) %alloca to ptr
+  store volatile i32 0, ptr %cast
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
index 5914521cf4189..a8aa6c780b86a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll
@@ -22,8 +22,8 @@
 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
   %alloca = alloca i32, addrspace(5)
-  %cast = addrspacecast i32 addrspace(5)* %alloca to i32*
-  store volatile i32 0, i32* %cast
+  %cast = addrspacecast ptr addrspace(5) %alloca to ptr
+  store volatile i32 0, ptr %cast
   ret void
 }
 
@@ -47,7 +47,7 @@ define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
 define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
   %alloca = alloca i32, addrspace(5)
-  store volatile i32 0, i32 addrspace(5)* %alloca
+  store volatile i32 0, ptr addrspace(5) %alloca
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 7d0029e9efa56..1369b02d87fe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -77,14 +77,11 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX11-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -148,16 +145,13 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
 ; GFX11-NEXT:    s_endpgm
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
   %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i3 = zext i32 %i2 to i64
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = sub nsw i32 31, %i2
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -221,18 +215,15 @@ define void @store_load_vindex_foo(i32 %idx) {
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [32 x float], align 4, addrspace(5)
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
-define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
+define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
 ; GFX9-LABEL: private_ptr_foo:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -266,8 +257,8 @@ define void @private_ptr_foo(float addrspace(5)* nocapture %arg) {
 ; GFX11-NEXT:    scratch_store_b32 v0, v1, off offset:4
 ; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr inbounds float, float addrspace(5)* %arg, i32 1
-  store float 1.000000e+01, float addrspace(5)* %gep, align 4
+  %gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1
+  store float 1.000000e+01, ptr addrspace(5) %gep, align 4
   ret void
 }
 
@@ -353,16 +344,13 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -436,18 +424,15 @@ define amdgpu_kernel void @store_load_vindex_small_offset_kernel() {
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
+  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
   %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i3 = zext i32 %i2 to i64
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = sub nsw i32 31, %i2
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -523,16 +508,13 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
 bb:
   %padding = alloca [64 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [64 x i32], [64 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -622,16 +604,13 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -709,18 +688,15 @@ define amdgpu_kernel void @store_load_vindex_large_offset_kernel() {
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
+  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
   %i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
   %i3 = zext i32 %i2 to i64
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i2
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i2
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = sub nsw i32 31, %i2
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -801,16 +777,13 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
 bb:
   %padding = alloca [4096 x i32], align 4, addrspace(5)
   %i = alloca [32 x float], align 4, addrspace(5)
-  %pad_gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %padding, i32 0, i32 undef
-  %pad_load = load volatile i32, i32 addrspace(5)* %pad_gep, align 4
-  %i1 = bitcast [32 x float] addrspace(5)* %i to i8 addrspace(5)*
-  %i7 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %idx
-  %i8 = bitcast float addrspace(5)* %i7 to i32 addrspace(5)*
-  store volatile i32 15, i32 addrspace(5)* %i8, align 4
+  %pad_gep = getelementptr inbounds [4096 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
+  %pad_load = load volatile i32, ptr addrspace(5) %pad_gep, align 4
+  %i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
   %i9 = and i32 %idx, 15
-  %i10 = getelementptr inbounds [32 x float], [32 x float] addrspace(5)* %i, i32 0, i32 %i9
-  %i11 = bitcast float addrspace(5)* %i10 to i32 addrspace(5)*
-  %i12 = load volatile i32, i32 addrspace(5)* %i11, align 4
+  %i10 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %i9
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -876,12 +849,12 @@ define amdgpu_kernel void @store_load_large_imm_offset_kernel() {
 ; GFX11-NEXT:    s_endpgm
 bb:
   %i = alloca [4096 x i32], align 4, addrspace(5)
-  %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
-  store volatile i32 13, i32 addrspace(5)* %i1, align 4
-  %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000
-  store volatile i32 15, i32 addrspace(5)* %i7, align 4
-  %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000
-  %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4
+  %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
+  store volatile i32 13, ptr addrspace(5) %i1, align 4
+  %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
+  %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -948,12 +921,12 @@ define void @store_load_large_imm_offset_foo() {
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
   %i = alloca [4096 x i32], align 4, addrspace(5)
-  %i1 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 undef
-  store volatile i32 13, i32 addrspace(5)* %i1, align 4
-  %i7 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000
-  store volatile i32 15, i32 addrspace(5)* %i7, align 4
-  %i10 = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(5)* %i, i32 0, i32 4000
-  %i12 = load volatile i32, i32 addrspace(5)* %i10, align 4
+  %i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
+  store volatile i32 13, ptr addrspace(5) %i1, align 4
+  %i7 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
+  store volatile i32 15, ptr addrspace(5) %i7, align 4
+  %i10 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 4000
+  %i12 = load volatile i32, ptr addrspace(5) %i10, align 4
   ret void
 }
 
@@ -1018,13 +991,13 @@ bb:
   %vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
   %add1 = add nsw i32 %sidx, %vidx
   %add2 = add nsw i32 %add1, 256
-  %gep = getelementptr inbounds [32 x i32], [32 x i32] addrspace(5)* %alloca, i32 0, i32 %add2
-  store volatile i32 15, i32 addrspace(5)* %gep, align 4
-  %load = load volatile i32, i32 addrspace(5)* %gep, align 4
+  %gep = getelementptr inbounds [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %add2
+  store volatile i32 15, ptr addrspace(5) %gep, align 4
+  %load = load volatile i32, ptr addrspace(5) %gep, align 4
   ret void
 }
 
-define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
+define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-LABEL: store_load_i64_aligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1070,12 +1043,12 @@ define void @store_load_i64_aligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  store volatile i64 15, i64 addrspace(5)* %arg, align 8
-  %load = load volatile i64, i64 addrspace(5)* %arg, align 8
+  store volatile i64 15, ptr addrspace(5) %arg, align 8
+  %load = load volatile i64, ptr addrspace(5) %arg, align 8
   ret void
 }
 
-define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
+define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-LABEL: store_load_i64_unaligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1121,12 +1094,12 @@ define void @store_load_i64_unaligned(i64 addrspace(5)* nocapture %arg) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  store volatile i64 15, i64 addrspace(5)* %arg, align 1
-  %load = load volatile i64, i64 addrspace(5)* %arg, align 1
+  store volatile i64 15, ptr addrspace(5) %arg, align 1
+  %load = load volatile i64, ptr addrspace(5) %arg, align 1
   ret void
 }
 
-define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg) {
+define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-LABEL: store_load_v3i32_unaligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1188,12 +1161,12 @@ define void @store_load_v3i32_unaligned(<3 x i32> addrspace(5)* nocapture %arg)
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  store volatile <3 x i32> <i32 1, i32 2, i32 3>, <3 x i32> addrspace(5)* %arg, align 1
-  %load = load volatile <3 x i32>, <3 x i32> addrspace(5)* %arg, align 1
+  store volatile <3 x i32> <i32 1, i32 2, i32 3>, ptr addrspace(5) %arg, align 1
+  %load = load volatile <3 x i32>, ptr addrspace(5) %arg, align 1
   ret void
 }
 
-define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg) {
+define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
 ; GFX9-LABEL: store_load_v4i32_unaligned:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1260,8 +1233,8 @@ define void @store_load_v4i32_unaligned(<4 x i32> addrspace(5)* nocapture %arg)
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
 bb:
-  store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %arg, align 1
-  %load = load volatile <4 x i32>, <4 x i32> addrspace(5)* %arg, align 1
+  store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %arg, align 1
+  %load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
index 6ac4dc886db07..3d5f908c8667f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmax_legacy.ll
@@ -229,7 +229,7 @@ define float @v_test_fmax_legacy_ule_f32_multi_use(float %a, float %b) {
   %cmp = fcmp ogt float %a, %b
   %val0 = select i1 %cmp, float %a, float %b
   %val1 = zext i1 %cmp to i32
-  store i32 %val1, i32 addrspace(3)* undef
+  store i32 %val1, ptr addrspace(3) undef
   ret float %val0
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
index 92961ab1c4dda..0f207945062a6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX11 %s
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
 ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -111,23 +111,23 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod0(float addrs
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
   %a.fneg = fsub float -0.0, %a
   %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
   %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
 ; SI-LABEL: v_test_no_global_nnans_med3_f32_pat0_srcmod0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -256,23 +256,23 @@ define amdgpu_kernel void @v_test_no_global_nnans_med3_f32_pat0_srcmod0(float ad
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
   %a.fneg = fsub float -0.0, %a
   %tmp0 = call float @llvm.minnum.f32(float %a.fneg, float %b)
   %tmp1 = call float @llvm.maxnum.f32(float %a.fneg, float %b)
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
 ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_srcmod012:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -383,13 +383,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
 
   %a.fneg = fsub float -0.0, %a
   %b.fabs = call float @llvm.fabs.f32(float %b)
@@ -401,11 +401,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_srcmod012(float add
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
 
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #2 {
+define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #2 {
 ; SI-LABEL: v_test_global_nnans_med3_f32_pat0_negabs012:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -521,13 +521,13 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
 
   %a.fabs = call float @llvm.fabs.f32(float %a)
   %a.fabs.fneg = fsub float -0.0, %a.fabs
@@ -541,11 +541,11 @@ define amdgpu_kernel void @v_test_global_nnans_med3_f32_pat0_negabs012(float add
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.fabs.fneg)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
 
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
-define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
 ; SI-LABEL: v_nnan_inputs_med3_f32_pat0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -660,13 +660,13 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
 
   %a.nnan = fadd nnan float %a, 1.0
   %b.nnan = fadd nnan float %b, 2.0
@@ -676,7 +676,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
   %tmp1 = call float @llvm.maxnum.f32(float %a.nnan, float %b.nnan)
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c.nnan)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
@@ -685,7 +685,7 @@ define amdgpu_kernel void @v_nnan_inputs_med3_f32_pat0(float addrspace(1)* %out,
 ; Negative patterns
 ; ---------------------------------------------------------------------
 
-define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) #1 {
+define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr, ptr addrspace(1) %cptr) #1 {
 ; SI-LABEL: v_test_safe_med3_f32_pat0_multi_use0:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
@@ -825,19 +825,19 @@ define amdgpu_kernel void @v_test_safe_med3_f32_pat0_multi_use0(float addrspace(
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
-  %gep1 = getelementptr float, float addrspace(1)* %bptr, i32 %tid
-  %gep2 = getelementptr float, float addrspace(1)* %cptr, i32 %tid
-  %outgep = getelementptr float, float addrspace(1)* %out, i32 %tid
-  %a = load volatile float, float addrspace(1)* %gep0
-  %b = load volatile float, float addrspace(1)* %gep1
-  %c = load volatile float, float addrspace(1)* %gep2
+  %gep0 = getelementptr float, ptr addrspace(1) %aptr, i32 %tid
+  %gep1 = getelementptr float, ptr addrspace(1) %bptr, i32 %tid
+  %gep2 = getelementptr float, ptr addrspace(1) %cptr, i32 %tid
+  %outgep = getelementptr float, ptr addrspace(1) %out, i32 %tid
+  %a = load volatile float, ptr addrspace(1) %gep0
+  %b = load volatile float, ptr addrspace(1) %gep1
+  %c = load volatile float, ptr addrspace(1) %gep2
   %tmp0 = call float @llvm.minnum.f32(float %a, float %b)
-  store volatile float %tmp0, float addrspace(1)* undef
+  store volatile float %tmp0, ptr addrspace(1) undef
   %tmp1 = call float @llvm.maxnum.f32(float %a, float %b)
   %tmp2 = call float @llvm.minnum.f32(float %tmp1, float %c)
   %med3 = call float @llvm.maxnum.f32(float %tmp0, float %tmp2)
-  store float %med3, float addrspace(1)* %outgep
+  store float %med3, ptr addrspace(1) %outgep
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
index 23b7c9d129aaf..10e6b3f1b47b1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmin_legacy.ll
@@ -212,7 +212,7 @@ define float @v_test_fmin_legacy_ule_f32_multi_use(float %a, float %b) {
   %cmp = fcmp ule float %a, %b
   %val0 = select i1 %cmp, float %a, float %b
   %val1 = zext i1 %cmp to i32
-  store i32 %val1, i32 addrspace(3)* undef
+  store i32 %val1, ptr addrspace(3) undef
   ret float %val0
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
index 1aaf515cc1257..31d35df6d0990 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
@@ -1,16 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -march=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data)
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
+declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
 
 ; bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
-declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data)
-declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data)
-declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3) * %ptr, <2 x half> %data, i32, i32, i1)
-declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3) * %ptr, <2 x i16> %data)
+declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
+declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
+declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
+declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
 
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret(float* %ptr, float %data) {
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret(ptr %ptr, float %data) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -20,11 +20,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret(float* %ptr, float %data)
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2
 ; GFX940-NEXT:    s_endpgm
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(float* %ptr) {
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -49,11 +49,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(float* %ptr) {
 ; GFX940-NEXT:    s_cbranch_execnz .LBB1_1
 ; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX940-NEXT:    s_endpgm
-  %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(float* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -78,22 +78,22 @@ define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(float* %ptr) #0 {
 ; GFX940-NEXT:    s_cbranch_execnz .LBB2_1
 ; GFX940-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX940-NEXT:    s_endpgm
-  %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst
   ret void
 }
 
-define float @flat_atomic_fadd_f32_rtn(float* %ptr, float %data) {
+define float @flat_atomic_fadd_f32_rtn(ptr %ptr, float %data) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
   ret float %ret
 }
 
-define float @flat_atomic_fadd_f32_rtn_pat(float* %ptr, float %data) {
+define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -117,11 +117,11 @@ define float @flat_atomic_fadd_f32_rtn_pat(float* %ptr, float %data) {
 ; GFX940-NEXT:    s_or_b64 exec, exec, s[0:1]
 ; GFX940-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = atomicrmw fadd float* %ptr, float 4.0 seq_cst
+  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst
   ret float %ret
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(<2 x half>* %ptr, <2 x half> %data) {
+define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(ptr %ptr, <2 x half> %data) {
 ; GFX940-LABEL: flat_atomic_fadd_v2f16_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -131,22 +131,22 @@ define amdgpu_kernel void @flat_atomic_fadd_v2f16_noret(<2 x half>* %ptr, <2 x h
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2
 ; GFX940-NEXT:    s_endpgm
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
   ret void
 }
 
-define <2 x half> @flat_atomic_fadd_v2f16_rtn(<2 x half>* %ptr, <2 x half> %data) {
+define <2 x half> @flat_atomic_fadd_v2f16_rtn(ptr %ptr, <2 x half> %data) {
 ; GFX940-LABEL: flat_atomic_fadd_v2f16_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 sc0
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0v2f16.v2f16(<2 x half>* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(<2 x i16>* %ptr, <2 x i16> %data) {
+define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(ptr %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: flat_atomic_fadd_v2bf16_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
@@ -156,22 +156,22 @@ define amdgpu_kernel void @flat_atomic_fadd_v2bf16_noret(<2 x i16>* %ptr, <2 x i
 ; GFX940-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX940-NEXT:    flat_atomic_pk_add_bf16 v[0:1], v2
 ; GFX940-NEXT:    s_endpgm
-  %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
   ret void
 }
 
-define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(<2 x i16>* %ptr, <2 x i16> %data) {
+define <2 x i16> @flat_atomic_fadd_v2bf16_rtn(ptr %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: flat_atomic_fadd_v2bf16_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    flat_atomic_pk_add_bf16 v0, v[0:1], v2 sc0
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0v2i16(<2 x i16>* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
   ret <2 x i16> %ret
 }
 
-define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) {
+define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(ptr addrspace(1) %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: global_atomic_fadd_v2bf16_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dword s4, s[0:1], 0x2c
@@ -181,22 +181,22 @@ define amdgpu_kernel void @global_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(1
 ; GFX940-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX940-NEXT:    global_atomic_pk_add_bf16 v1, v0, s[2:3]
 ; GFX940-NEXT:    s_endpgm
-  %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
   ret void
 }
 
-define <2 x i16> @global_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data) {
+define <2 x i16> @global_atomic_fadd_v2bf16_rtn(ptr addrspace(1) %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: global_atomic_fadd_v2bf16_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    global_atomic_pk_add_bf16 v0, v[0:1], v2, off sc0
 ; GFX940-NEXT:    s_waitcnt vmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1v2i16(<2 x i16> addrspace(1)* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
   ret <2 x i16> %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(<2 x half> addrspace(3)* %ptr, <2 x half> %data) {
+define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) {
 ; GFX940-LABEL: local_atomic_fadd_v2f16_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -205,22 +205,22 @@ define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(<2 x half> addrspace(3)
 ; GFX940-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX940-NEXT:    ds_pk_add_f16 v0, v1
 ; GFX940-NEXT:    s_endpgm
-  %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
+  %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
   ret void
 }
 
-define <2 x half> @local_atomic_fadd_v2f16_rtn(<2 x half> addrspace(3)* %ptr, <2 x half> %data) {
+define <2 x half> @local_atomic_fadd_v2f16_rtn(ptr addrspace(3) %ptr, <2 x half> %data) {
 ; GFX940-LABEL: local_atomic_fadd_v2f16_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1
 ; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(<2 x half> addrspace(3)* %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
+  %ret = call <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32 0, i32 0, i1 0)
   ret <2 x half> %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) {
+define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(ptr addrspace(3) %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: local_atomic_fadd_v2bf16_noret:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -233,11 +233,11 @@ define amdgpu_kernel void @local_atomic_fadd_v2bf16_noret(<2 x i16> addrspace(3)
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    buffer_inv sc0 sc1
 ; GFX940-NEXT:    s_endpgm
-  %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
   ret void
 }
 
-define <2 x i16> @local_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data) {
+define <2 x i16> @local_atomic_fadd_v2bf16_rtn(ptr addrspace(3) %ptr, <2 x i16> %data) {
 ; GFX940-LABEL: local_atomic_fadd_v2bf16_rtn:
 ; GFX940:       ; %bb.0:
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -247,7 +247,7 @@ define <2 x i16> @local_atomic_fadd_v2bf16_rtn(<2 x i16> addrspace(3)* %ptr, <2
 ; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX940-NEXT:    buffer_inv sc0 sc1
 ; GFX940-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(<2 x i16> addrspace(3)* %ptr, <2 x i16> %data)
+  %ret = call <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
   ret <2 x i16> %ret
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 517fb872752a2..7db9d3769a743 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -7,13 +7,13 @@ declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32
 declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
-declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1)
+declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
+declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
+declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
+declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
+declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
+declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
 
 define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
 ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
@@ -40,11 +40,11 @@ define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -61,7 +61,7 @@ define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inre
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -90,11 +90,11 @@ define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -111,7 +111,7 @@ define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> i
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -140,11 +140,11 @@ define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -161,7 +161,7 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inre
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -190,11 +190,11 @@ define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -211,7 +211,7 @@ define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> i
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -240,11 +240,11 @@ define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, doub
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -261,7 +261,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -290,11 +290,11 @@ define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, d
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0)
-  store double %ret, double* undef
+  store double %ret, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
+define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, ptr addrspace(1) %out) {
 ; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
@@ -311,11 +311,11 @@ define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> i
 ; GFX90A-NEXT:    s_endpgm
 main_body:
   %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2)
-  store double %ret, double addrspace(1)* %out, align 8
+  store double %ret, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %ptr, double %data) {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -325,11 +325,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %pt
 ; GFX90A-NEXT:    global_atomic_add_f64 v2, v[0:1], s[0:1]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %ptr, double %data) {
+define amdgpu_kernel void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fmin_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -339,11 +339,11 @@ define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %pt
 ; GFX90A-NEXT:    global_atomic_min_f64 v2, v[0:1], s[0:1]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %ptr, double %data) {
+define amdgpu_kernel void @global_atomic_fmax_f64_noret(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fmax_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -353,11 +353,11 @@ define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %pt
 ; GFX90A-NEXT:    global_atomic_max_f64 v2, v[0:1], s[0:1]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(ptr addrspace(1) %ptr) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -384,11 +384,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspace(1)* %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(ptr addrspace(1) %ptr) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -401,11 +401,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspa
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrspace(1)* %ptr) #1 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(ptr addrspace(1) %ptr) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -432,11 +432,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(ptr addrspace(1) %ptr) #0 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -449,11 +449,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspa
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
 }
 
-define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) {
+define double @global_atomic_fadd_f64_rtn(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -461,11 +461,11 @@ define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %dat
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat(ptr addrspace(1) %ptr, double %data) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -492,11 +492,11 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 seq_cst
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_agent(ptr addrspace(1) %ptr, double %data) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -508,11 +508,11 @@ define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, d
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret double %ret
 }
 
-define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, double %data) #1 {
+define double @global_atomic_fadd_f64_rtn_pat_system(ptr addrspace(1) %ptr, double %data) #1 {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -539,11 +539,11 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("one-as") seq_cst
   ret double %ret
 }
 
-define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) {
+define double @global_atomic_fmax_f64_rtn(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fmax_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -551,11 +551,11 @@ define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %dat
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %data) {
+define double @global_atomic_fmin_f64_rtn(ptr addrspace(1) %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fmin_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -563,11 +563,11 @@ define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %dat
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double addrspace(1)* %ptr) {
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(ptr addrspace(1) %ptr) {
 ; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -592,11 +592,11 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -623,11 +623,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -641,11 +641,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #1 {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -673,11 +673,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
   ret void
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -704,11 +704,11 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 seq_cst
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_agent(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -720,11 +720,11 @@ define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 {
 ; GFX90A-NEXT:    buffer_wbinvl1_vol
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
   ret double %ret
 }
 
-define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
+define double @flat_atomic_fadd_f64_rtn_pat_system(ptr %ptr) #1 {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -752,11 +752,11 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
 ; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("one-as") seq_cst
   ret double %ret
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -766,11 +766,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data
 ; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[2:3]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
   ret void
 }
 
-define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) {
+define double @flat_atomic_fadd_f64_rtn(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -778,11 +778,11 @@ define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %ptr) {
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(ptr %ptr) {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
@@ -807,11 +807,11 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  %ret = atomicrmw fadd ptr %ptr, double 4.0 syncscope("agent") seq_cst
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) {
+define amdgpu_kernel void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -821,11 +821,11 @@ define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data
 ; GFX90A-NEXT:    flat_atomic_min_f64 v[0:1], v[2:3]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
   ret void
 }
 
-define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) {
+define double @flat_atomic_fmin_f64_rtn(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fmin_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -833,11 +833,11 @@ define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data) {
+define amdgpu_kernel void @flat_atomic_fmax_f64_noret(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fmax_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -847,11 +847,11 @@ define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data
 ; GFX90A-NEXT:    flat_atomic_max_f64 v[0:1], v[2:3]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
   ret void
 }
 
-define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) {
+define double @flat_atomic_fmax_f64_rtn(ptr %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fmax_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -859,11 +859,11 @@ define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr, double %data) {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret(ptr addrspace(3) %ptr, double %data) {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s4, s[0:1], 0x24
@@ -874,11 +874,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr
 ; GFX90A-NEXT:    ds_add_f64 v2, v[0:1]
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret void
 }
 
-define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data) {
+define double @local_atomic_fadd_f64_rtn(ptr addrspace(3) %ptr, double %data) {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -888,11 +888,11 @@ define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(ptr addrspace(3) %ptr) #1 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -905,11 +905,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)*
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(ptr addrspace(3) %ptr) #0 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -922,11 +922,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspac
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(ptr addrspace(3) %ptr) #4 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s2, s[0:1], 0x24
@@ -950,11 +950,11 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
-  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret void
 }
 
-define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) #1 {
+define double @local_atomic_fadd_f64_rtn_pat(ptr addrspace(3) %ptr, double %data) #1 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -965,11 +965,11 @@ define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  %ret = atomicrmw fadd ptr addrspace(3) %ptr, double 4.0 seq_cst
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 {
+define double @local_atomic_fadd_f64_rtn_ieee_unsafe(ptr addrspace(3) %ptr, double %data) #2 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -979,11 +979,11 @@ define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr,
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
 }
 
-define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 {
+define double @local_atomic_fadd_f64_rtn_ieee_safe(ptr addrspace(3) %ptr, double %data) #3 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -993,7 +993,7 @@ define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, do
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
 main_body:
-  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %ptr, double %data, i32 0, i32 0, i1 0)
   ret double %ret
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
index 1eaae2cfff9d6..7197dc663eded 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/frem.ll
@@ -2,7 +2,7 @@
 ; RUN:  llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck --check-prefix=CI %s
 ; RUN:  llc -global-isel -amdgpu-scalarize-global-loads=false -enable-misched=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s
 
-define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -56,15 +56,15 @@ define amdgpu_kernel void @frem_f16(half addrspace(1)* %out, half addrspace(1)*
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
-   %r0 = load half, half addrspace(1)* %in1, align 4
-   %r1 = load half, half addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
+   %r0 = load half, ptr addrspace(1) %in1, align 4
+   %r1 = load half, ptr addrspace(1) %gep2, align 4
    %r2 = frem half %r0, %r1
-   store half %r2, half addrspace(1)* %out, align 4
+   store half %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: fast_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -102,15 +102,15 @@ define amdgpu_kernel void @fast_frem_f16(half addrspace(1)* %out, half addrspace
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
-   %r0 = load half, half addrspace(1)* %in1, align 4
-   %r1 = load half, half addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
+   %r0 = load half, ptr addrspace(1) %in1, align 4
+   %r1 = load half, ptr addrspace(1) %gep2, align 4
    %r2 = frem fast half %r0, %r1
-   store half %r2, half addrspace(1)* %out, align 4
+   store half %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspace(1)* %in1, half addrspace(1)* %in2) #1 {
+define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 {
 ; CI-LABEL: unsafe_frem_f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -148,15 +148,15 @@ define amdgpu_kernel void @unsafe_frem_f16(half addrspace(1)* %out, half addrspa
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_short v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr half, half addrspace(1)* %in2, i32 4
-   %r0 = load half, half addrspace(1)* %in1, align 4
-   %r1 = load half, half addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4
+   %r0 = load half, ptr addrspace(1) %in1, align 4
+   %r1 = load half, ptr addrspace(1) %gep2, align 4
    %r2 = frem half %r0, %r1
-   store half %r2, half addrspace(1)* %out, align 4
+   store half %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -214,15 +214,15 @@ define amdgpu_kernel void @frem_f32(float addrspace(1)* %out, float addrspace(1)
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float, float addrspace(1)* %in1, align 4
-   %r1 = load float, float addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
+   %r0 = load float, ptr addrspace(1) %in1, align 4
+   %r1 = load float, ptr addrspace(1) %gep2, align 4
    %r2 = frem float %r0, %r1
-   store float %r2, float addrspace(1)* %out, align 4
+   store float %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: fast_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -258,15 +258,15 @@ define amdgpu_kernel void @fast_frem_f32(float addrspace(1)* %out, float addrspa
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float, float addrspace(1)* %in1, align 4
-   %r1 = load float, float addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
+   %r0 = load float, ptr addrspace(1) %in1, align 4
+   %r1 = load float, ptr addrspace(1) %gep2, align 4
    %r2 = frem fast float %r0, %r1
-   store float %r2, float addrspace(1)* %out, align 4
+   store float %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrspace(1)* %in1, float addrspace(1)* %in2) #1 {
+define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #1 {
 ; CI-LABEL: unsafe_frem_f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -302,15 +302,15 @@ define amdgpu_kernel void @unsafe_frem_f32(float addrspace(1)* %out, float addrs
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr float, float addrspace(1)* %in2, i32 4
-   %r0 = load float, float addrspace(1)* %in1, align 4
-   %r1 = load float, float addrspace(1)* %gep2, align 4
+   %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4
+   %r0 = load float, ptr addrspace(1) %in1, align 4
+   %r1 = load float, ptr addrspace(1) %gep2, align 4
    %r2 = frem float %r0, %r1
-   store float %r2, float addrspace(1)* %out, align 4
+   store float %r2, ptr addrspace(1) %out, align 4
    ret void
 }
 
-define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -366,14 +366,14 @@ define amdgpu_kernel void @frem_f64(double addrspace(1)* %out, double addrspace(
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-   %r0 = load double, double addrspace(1)* %in1, align 8
-   %r1 = load double, double addrspace(1)* %in2, align 8
+   %r0 = load double, ptr addrspace(1) %in1, align 8
+   %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem double %r0, %r1
-   store double %r2, double addrspace(1)* %out, align 8
+   store double %r2, ptr addrspace(1) %out, align 8
    ret void
 }
 
-define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1, double addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: fast_frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -423,14 +423,14 @@ define amdgpu_kernel void @fast_frem_f64(double addrspace(1)* %out, double addrs
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-   %r0 = load double, double addrspace(1)* %in1, align 8
-   %r1 = load double, double addrspace(1)* %in2, align 8
+   %r0 = load double, ptr addrspace(1) %in1, align 8
+   %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem fast double %r0, %r1
-   store double %r2, double addrspace(1)* %out, align 8
+   store double %r2, ptr addrspace(1) %out, align 8
    ret void
 }
 
-define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double addrspace(1)* %in1,
+define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
 ; CI-LABEL: unsafe_frem_f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -480,15 +480,15 @@ define amdgpu_kernel void @unsafe_frem_f64(double addrspace(1)* %out, double add
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-                             double addrspace(1)* %in2) #1 {
-   %r0 = load double, double addrspace(1)* %in1, align 8
-   %r1 = load double, double addrspace(1)* %in2, align 8
+                             ptr addrspace(1) %in2) #1 {
+   %r0 = load double, ptr addrspace(1) %in1, align 8
+   %r1 = load double, ptr addrspace(1) %in2, align 8
    %r2 = frem double %r0, %r1
-   store double %r2, double addrspace(1)* %out, align 8
+   store double %r2, ptr addrspace(1) %out, align 8
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in1, <2 x half> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_v2f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -580,15 +580,15 @@ define amdgpu_kernel void @frem_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ; VI-NEXT:    v_mov_b32_e32 v1, s5
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr <2 x half>, <2 x half> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x half>, <2 x half> addrspace(1)* %in1, align 8
-   %r1 = load <2 x half>, <2 x half> addrspace(1)* %gep2, align 8
+   %gep2 = getelementptr <2 x half>, ptr addrspace(1) %in2, i32 4
+   %r0 = load <2 x half>, ptr addrspace(1) %in1, align 8
+   %r1 = load <2 x half>, ptr addrspace(1) %gep2, align 8
    %r2 = frem <2 x half> %r0, %r1
-   store <2 x half> %r2, <2 x half> addrspace(1)* %out, align 8
+   store <2 x half> %r2, ptr addrspace(1) %out, align 8
    ret void
 }
 
-define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in1, <4 x half> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_v4f16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -744,15 +744,15 @@ define amdgpu_kernel void @frem_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr <4 x half>, <4 x half> addrspace(1)* %in2, i32 4
-   %r0 = load <4 x half>, <4 x half> addrspace(1)* %in1, align 16
-   %r1 = load <4 x half>, <4 x half> addrspace(1)* %gep2, align 16
+   %gep2 = getelementptr <4 x half>, ptr addrspace(1) %in2, i32 4
+   %r0 = load <4 x half>, ptr addrspace(1) %in1, align 16
+   %r1 = load <4 x half>, ptr addrspace(1) %gep2, align 16
    %r2 = frem <4 x half> %r0, %r1
-   store <4 x half> %r2, <4 x half> addrspace(1)* %out, align 16
+   store <4 x half> %r2, ptr addrspace(1) %out, align 16
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in1, <2 x float> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_v2f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -842,15 +842,15 @@ define amdgpu_kernel void @frem_v2f32(<2 x float> addrspace(1)* %out, <2 x float
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x float>, <2 x float> addrspace(1)* %in1, align 8
-   %r1 = load <2 x float>, <2 x float> addrspace(1)* %gep2, align 8
+   %gep2 = getelementptr <2 x float>, ptr addrspace(1) %in2, i32 4
+   %r0 = load <2 x float>, ptr addrspace(1) %in1, align 8
+   %r1 = load <2 x float>, ptr addrspace(1) %gep2, align 8
    %r2 = frem <2 x float> %r0, %r1
-   store <2 x float> %r2, <2 x float> addrspace(1)* %out, align 8
+   store <2 x float> %r2, ptr addrspace(1) %out, align 8
    ret void
 }
 
-define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in1, <4 x float> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_v4f32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1004,15 +1004,15 @@ define amdgpu_kernel void @frem_v4f32(<4 x float> addrspace(1)* %out, <4 x float
 ; VI-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr <4 x float>, <4 x float> addrspace(1)* %in2, i32 4
-   %r0 = load <4 x float>, <4 x float> addrspace(1)* %in1, align 16
-   %r1 = load <4 x float>, <4 x float> addrspace(1)* %gep2, align 16
+   %gep2 = getelementptr <4 x float>, ptr addrspace(1) %in2, i32 4
+   %r0 = load <4 x float>, ptr addrspace(1) %in1, align 16
+   %r1 = load <4 x float>, ptr addrspace(1) %gep2, align 16
    %r2 = frem <4 x float> %r0, %r1
-   store <4 x float> %r2, <4 x float> addrspace(1)* %out, align 16
+   store <4 x float> %r2, ptr addrspace(1) %out, align 16
    ret void
 }
 
-define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %in1, <2 x double> addrspace(1)* %in2) #0 {
+define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ptr addrspace(1) %in2) #0 {
 ; CI-LABEL: frem_v2f64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -1098,11 +1098,11 @@ define amdgpu_kernel void @frem_v2f64(<2 x double> addrspace(1)* %out, <2 x doub
 ; VI-NEXT:    v_mov_b32_e32 v5, s5
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
-   %gep2 = getelementptr <2 x double>, <2 x double> addrspace(1)* %in2, i32 4
-   %r0 = load <2 x double>, <2 x double> addrspace(1)* %in1, align 16
-   %r1 = load <2 x double>, <2 x double> addrspace(1)* %gep2, align 16
+   %gep2 = getelementptr <2 x double>, ptr addrspace(1) %in2, i32 4
+   %r0 = load <2 x double>, ptr addrspace(1) %in1, align 16
+   %r1 = load <2 x double>, ptr addrspace(1) %gep2, align 16
    %r2 = frem <2 x double> %r0, %r1
-   store <2 x double> %r2, <2 x double> addrspace(1)* %out, align 16
+   store <2 x double> %r2, ptr addrspace(1) %out, align 16
    ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index 75b2d1969673b..9de3335b3075c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -7,11 +7,11 @@ define i1 @i1_func_void() #0 {
   ; CHECK-LABEL: name: i1_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i1, i1 addrspace(1)* undef
+  %val = load i1, ptr addrspace(1) undef
   ret i1 %val
 }
 
@@ -19,11 +19,11 @@ define zeroext i1 @i1_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i1_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i1, i1 addrspace(1)* undef
+  %val = load i1, ptr addrspace(1) undef
   ret i1 %val
 }
 
@@ -31,11 +31,11 @@ define signext i1 @i1_signext_func_void() #0 {
   ; CHECK-LABEL: name: i1_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i1, i1 addrspace(1)* undef
+  %val = load i1, ptr addrspace(1) undef
   ret i1 %val
 }
 
@@ -43,11 +43,11 @@ define i7 @i7_func_void() #0 {
   ; CHECK-LABEL: name: i7_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i7, i7 addrspace(1)* undef
+  %val = load i7, ptr addrspace(1) undef
   ret i7 %val
 }
 
@@ -55,11 +55,11 @@ define zeroext i7 @i7_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i7_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i7, i7 addrspace(1)* undef
+  %val = load i7, ptr addrspace(1) undef
   ret i7 %val
 }
 
@@ -67,11 +67,11 @@ define signext i7 @i7_signext_func_void() #0 {
   ; CHECK-LABEL: name: i7_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `i7 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load (s7) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i7, i7 addrspace(1)* undef
+  %val = load i7, ptr addrspace(1) undef
   ret i7 %val
 }
 
@@ -79,11 +79,11 @@ define i8 @i8_func_void() #0 {
   ; CHECK-LABEL: name: i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i8, i8 addrspace(1)* undef
+  %val = load i8, ptr addrspace(1) undef
   ret i8 %val
 }
 
@@ -91,11 +91,11 @@ define zeroext i8 @i8_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i8_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i8, i8 addrspace(1)* undef
+  %val = load i8, ptr addrspace(1) undef
   ret i8 %val
 }
 
@@ -103,11 +103,11 @@ define signext i8 @i8_signext_func_void() #0 {
   ; CHECK-LABEL: name: i8_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i8, i8 addrspace(1)* undef
+  %val = load i8, ptr addrspace(1) undef
   ret i8 %val
 }
 
@@ -115,11 +115,11 @@ define i16 @i16_func_void() #0 {
   ; CHECK-LABEL: name: i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i16, i16 addrspace(1)* undef
+  %val = load i16, ptr addrspace(1) undef
   ret i16 %val
 }
 
@@ -127,11 +127,11 @@ define zeroext i16 @i16_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i16_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i16, i16 addrspace(1)* undef
+  %val = load i16, ptr addrspace(1) undef
   ret i16 %val
 }
 
@@ -139,11 +139,11 @@ define signext i16 @i16_signext_func_void() #0 {
   ; CHECK-LABEL: name: i16_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i16, i16 addrspace(1)* undef
+  %val = load i16, ptr addrspace(1) undef
   ret i16 %val
 }
 
@@ -151,11 +151,11 @@ define half @f16_func_void() #0 {
   ; CHECK-LABEL: name: f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `half addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load half, half addrspace(1)* undef
+  %val = load half, ptr addrspace(1) undef
   ret half %val
 }
 
@@ -163,11 +163,11 @@ define i24 @i24_func_void() #0 {
   ; CHECK-LABEL: name: i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s24)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i24, i24 addrspace(1)* undef
+  %val = load i24, ptr addrspace(1) undef
   ret i24 %val
 }
 
@@ -175,11 +175,11 @@ define zeroext i24 @i24_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i24_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s24)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i24, i24 addrspace(1)* undef
+  %val = load i24, ptr addrspace(1) undef
   ret i24 %val
 }
 
@@ -187,11 +187,11 @@ define signext i24 @i24_signext_func_void() #0 {
   ; CHECK-LABEL: name: i24_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s24) = G_LOAD [[DEF]](p1) :: (load (s24) from `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s24)
   ; CHECK-NEXT:   $vgpr0 = COPY [[SEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i24, i24 addrspace(1)* undef
+  %val = load i24, ptr addrspace(1) undef
   ret i24 %val
 }
 
@@ -199,14 +199,14 @@ define <2 x i24> @v2i24_func_void() #0 {
   ; CHECK-LABEL: name: v2i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s24>) = G_LOAD [[DEF]](p1) :: (load (<2 x s24>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<2 x s24>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <2 x i24>, <2 x i24> addrspace(1)* undef
+  %val = load <2 x i24>, ptr addrspace(1) undef
   ret <2 x i24> %val
 }
 
@@ -214,7 +214,7 @@ define <3 x i24> @v3i24_func_void() #0 {
   ; CHECK-LABEL: name: v3i24_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s24>) = G_LOAD [[DEF]](p1) :: (load (<3 x s24>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s24), [[UV1:%[0-9]+]]:_(s24), [[UV2:%[0-9]+]]:_(s24) = G_UNMERGE_VALUES [[LOAD]](<3 x s24>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s24)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s24)
@@ -223,7 +223,7 @@ define <3 x i24> @v3i24_func_void() #0 {
   ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load <3 x i24>, <3 x i24> addrspace(1)* undef
+  %val = load <3 x i24>, ptr addrspace(1) undef
   ret <3 x i24> %val
 }
 
@@ -231,10 +231,10 @@ define i32 @i32_func_void() #0 {
   ; CHECK-LABEL: name: i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load i32, i32 addrspace(1)* undef
+  %val = load i32, ptr addrspace(1) undef
   ret i32 %val
 }
 
@@ -242,13 +242,13 @@ define i48 @i48_func_void() #0 {
   ; CHECK-LABEL: name: i48_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load i48, i48 addrspace(1)* undef, align 8
+  %val = load i48, ptr addrspace(1) undef, align 8
   ret i48 %val
 }
 
@@ -256,13 +256,13 @@ define signext i48 @i48_signext_func_void() #0 {
   ; CHECK-LABEL: name: i48_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load i48, i48 addrspace(1)* undef, align 8
+  %val = load i48, ptr addrspace(1) undef, align 8
   ret i48 %val
 }
 
@@ -270,13 +270,13 @@ define zeroext i48 @i48_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i48_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load i48, i48 addrspace(1)* undef, align 8
+  %val = load i48, ptr addrspace(1) undef, align 8
   ret i48 %val
 }
 
@@ -284,12 +284,12 @@ define i64 @i64_func_void() #0 {
   ; CHECK-LABEL: name: i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load i64, i64 addrspace(1)* undef
+  %val = load i64, ptr addrspace(1) undef
   ret i64 %val
 }
 
@@ -297,14 +297,14 @@ define i65 @i65_func_void() #0 {
   ; CHECK-LABEL: name: i65_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load i65, i65 addrspace(1)* undef
+  %val = load i65, ptr addrspace(1) undef
   ret i65 %val
 }
 
@@ -312,14 +312,14 @@ define signext i65 @i65_signext_func_void() #0 {
   ; CHECK-LABEL: name: i65_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load i65, i65 addrspace(1)* undef
+  %val = load i65, ptr addrspace(1) undef
   ret i65 %val
 }
 
@@ -327,14 +327,14 @@ define zeroext i65 @i65_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i65_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `i65 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load (s65) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load i65, i65 addrspace(1)* undef
+  %val = load i65, ptr addrspace(1) undef
   ret i65 %val
 }
 
@@ -342,10 +342,10 @@ define float @f32_func_void() #0 {
   ; CHECK-LABEL: name: f32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `float addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (load (s32) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load float, float addrspace(1)* undef
+  %val = load float, ptr addrspace(1) undef
   ret float %val
 }
 
@@ -353,12 +353,12 @@ define double @f64_func_void() #0 {
   ; CHECK-LABEL: name: f64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `double addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[DEF]](p1) :: (load (s64) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load double, double addrspace(1)* undef
+  %val = load double, ptr addrspace(1) undef
   ret double %val
 }
 
@@ -366,14 +366,14 @@ define <2 x double> @v2f64_func_void() #0 {
   ; CHECK-LABEL: name: v2f64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %val = load <2 x double>, <2 x double> addrspace(1)* undef
+  %val = load <2 x double>, ptr addrspace(1) undef
   ret <2 x double> %val
 }
 
@@ -381,12 +381,12 @@ define <2 x i32> @v2i32_func_void() #0 {
   ; CHECK-LABEL: name: v2i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: (load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
+  %val = load <2 x i32>, ptr addrspace(1) undef
   ret <2 x i32> %val
 }
 
@@ -394,13 +394,13 @@ define <3 x i32> @v3i32_func_void() #0 {
   ; CHECK-LABEL: name: v3i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[DEF]](p1) :: (load (<3 x s32>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<3 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
+  %val = load <3 x i32>, ptr addrspace(1) undef
   ret <3 x i32> %val
 }
 
@@ -408,14 +408,14 @@ define <4 x i32> @v4i32_func_void() #0 {
   ; CHECK-LABEL: name: v4i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: (load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<4 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
+  %val = load <4 x i32>, ptr addrspace(1) undef
   ret <4 x i32> %val
 }
 
@@ -423,7 +423,7 @@ define <5 x i32> @v5i32_func_void() #0 {
   ; CHECK-LABEL: name: v5i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<5 x s32>) from `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<5 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
@@ -431,7 +431,7 @@ define <5 x i32> @v5i32_func_void() #0 {
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4
-  %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
+  %val = load volatile <5 x i32>, ptr addrspace(1) undef
   ret <5 x i32> %val
 }
 
@@ -439,7 +439,7 @@ define <8 x i32> @v8i32_func_void() #0 {
   ; CHECK-LABEL: name: v8i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -451,8 +451,8 @@ define <8 x i32> @v8i32_func_void() #0 {
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
-  %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <8 x i32>, ptr addrspace(1) %ptr
   ret <8 x i32> %val
 }
 
@@ -460,7 +460,7 @@ define <16 x i32> @v16i32_func_void() #0 {
   ; CHECK-LABEL: name: v16i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -480,8 +480,8 @@ define <16 x i32> @v16i32_func_void() #0 {
   ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
   ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
-  %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i32>, ptr addrspace(1) %ptr
   ret <16 x i32> %val
 }
 
@@ -489,7 +489,7 @@ define <32 x i32> @v32i32_func_void() #0 {
   ; CHECK-LABEL: name: v32i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -525,8 +525,8 @@ define <32 x i32> @v32i32_func_void() #0 {
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[UV31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
-  %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <32 x i32>, ptr addrspace(1) %ptr
   ret <32 x i32> %val
 }
 
@@ -534,14 +534,14 @@ define <2 x i64> @v2i64_func_void() #0 {
   ; CHECK-LABEL: name: v2i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `<2 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[DEF]](p1) :: (load (<2 x s64>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
+  %val = load <2 x i64>, ptr addrspace(1) undef
   ret <2 x i64> %val
 }
 
@@ -549,7 +549,7 @@ define <3 x i64> @v3i64_func_void() #0 {
   ; CHECK-LABEL: name: v3i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<3 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<3 x s64>) from %ir.ptr, align 32, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<3 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -559,8 +559,8 @@ define <3 x i64> @v3i64_func_void() #0 {
   ; CHECK-NEXT:   $vgpr4 = COPY [[UV4]](s32)
   ; CHECK-NEXT:   $vgpr5 = COPY [[UV5]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
-  %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
-  %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <3 x i64>, ptr addrspace(1) %ptr
   ret <3 x i64> %val
 }
 
@@ -568,7 +568,7 @@ define <4 x i64> @v4i64_func_void() #0 {
   ; CHECK-LABEL: name: v4i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<4 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s64>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<4 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -580,8 +580,8 @@ define <4 x i64> @v4i64_func_void() #0 {
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](s32)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
-  %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
-  %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <4 x i64>, ptr addrspace(1) %ptr
   ret <4 x i64> %val
 }
 
@@ -589,7 +589,7 @@ define <5 x i64> @v5i64_func_void() #0 {
   ; CHECK-LABEL: name: v5i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<5 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<5 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s64>) from %ir.ptr, align 64, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<5 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -603,8 +603,8 @@ define <5 x i64> @v5i64_func_void() #0 {
   ; CHECK-NEXT:   $vgpr8 = COPY [[UV8]](s32)
   ; CHECK-NEXT:   $vgpr9 = COPY [[UV9]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9
-  %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
-  %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <5 x i64>, ptr addrspace(1) %ptr
   ret <5 x i64> %val
 }
 
@@ -612,7 +612,7 @@ define <8 x i64> @v8i64_func_void() #0 {
   ; CHECK-LABEL: name: v8i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s64>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<8 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -632,8 +632,8 @@ define <8 x i64> @v8i64_func_void() #0 {
   ; CHECK-NEXT:   $vgpr14 = COPY [[UV14]](s32)
   ; CHECK-NEXT:   $vgpr15 = COPY [[UV15]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
-  %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
-  %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <8 x i64>, ptr addrspace(1) %ptr
   ret <8 x i64> %val
 }
 
@@ -641,7 +641,7 @@ define <16 x i64> @v16i64_func_void() #0 {
   ; CHECK-LABEL: name: v16i64_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i64> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s64>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s64>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<16 x s64>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -677,8 +677,8 @@ define <16 x i64> @v16i64_func_void() #0 {
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[UV31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
-  %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i64>, ptr addrspace(1) %ptr
   ret <16 x i64> %val
 }
 
@@ -686,10 +686,10 @@ define <2 x i16> @v2i16_func_void() #0 {
   ; CHECK-LABEL: name: v2i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
+  %val = load <2 x i16>, ptr addrspace(1) undef
   ret <2 x i16> %val
 }
 
@@ -697,10 +697,10 @@ define <2 x half> @v2f16_func_void() #0 {
   ; CHECK-LABEL: name: v2f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: (load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[LOAD]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %val = load <2 x half>, <2 x half> addrspace(1)* undef
+  %val = load <2 x half>, ptr addrspace(1) undef
   ret <2 x half> %val
 }
 
@@ -708,7 +708,7 @@ define <3 x i16> @v3i16_func_void() #0 {
   ; CHECK-LABEL: name: v3i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: (load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD]](<3 x s16>)
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[DEF1]](s16)
@@ -716,7 +716,7 @@ define <3 x i16> @v3i16_func_void() #0 {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV3]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV4]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
+  %val = load <3 x i16>, ptr addrspace(1) undef
   ret <3 x i16> %val
 }
 
@@ -724,12 +724,12 @@ define <4 x i16> @v4i16_func_void() #0 {
   ; CHECK-LABEL: name: v4i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
+  %val = load <4 x i16>, ptr addrspace(1) undef
   ret <4 x i16> %val
 }
 
@@ -737,12 +737,12 @@ define <4 x half> @v4f16_func_void() #0 {
   ; CHECK-LABEL: name: v4f16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `<4 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: (load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD]](<4 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <4 x half>, <4 x half> addrspace(1)* undef
+  %val = load <4 x half>, ptr addrspace(1) undef
   ret <4 x half> %val
 }
 
@@ -750,7 +750,7 @@ define <5 x i16> @v5i16_func_void() #0 {
   ; CHECK-LABEL: name: v5i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<5 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<5 x s16>) from %ir.ptr, align 16, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[LOAD1]](<5 x s16>)
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
@@ -760,8 +760,8 @@ define <5 x i16> @v5i16_func_void() #0 {
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV6]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV7]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
-  %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <5 x i16>, ptr addrspace(1) %ptr
   ret <5 x i16> %val
 }
 
@@ -769,7 +769,7 @@ define <8 x i16> @v8i16_func_void() #0 {
   ; CHECK-LABEL: name: v8i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<8 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<8 x s16>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<8 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
@@ -777,8 +777,8 @@ define <8 x i16> @v8i16_func_void() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
-  %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <8 x i16>, ptr addrspace(1) %ptr
   ret <8 x i16> %val
 }
 
@@ -786,7 +786,7 @@ define <16 x i16> @v16i16_func_void() #0 {
   ; CHECK-LABEL: name: v16i16_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i16> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s16>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>), [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[LOAD1]](<16 x s16>)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](<2 x s16>)
@@ -798,8 +798,8 @@ define <16 x i16> @v16i16_func_void() #0 {
   ; CHECK-NEXT:   $vgpr6 = COPY [[UV6]](<2 x s16>)
   ; CHECK-NEXT:   $vgpr7 = COPY [[UV7]](<2 x s16>)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
-  %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i16>, ptr addrspace(1) %ptr
   ret <16 x i16> %val
 }
 
@@ -807,7 +807,7 @@ define <16 x i8> @v16i8_func_void() #0 {
   ; CHECK-LABEL: name: v16i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<16 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8), [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<16 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -859,8 +859,8 @@ define <16 x i8> @v16i8_func_void() #0 {
   ; CHECK-NEXT:   [[ANYEXT31:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT15]](s16)
   ; CHECK-NEXT:   $vgpr15 = COPY [[ANYEXT31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15
-  %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i8>, ptr addrspace(1) %ptr
   ret <16 x i8> %val
 }
 
@@ -868,7 +868,7 @@ define <2 x i8> @v2i8_func_void() #0 {
   ; CHECK-LABEL: name: v2i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `<2 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[DEF]](p1) :: (load (<2 x s8>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<2 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
@@ -877,7 +877,7 @@ define <2 x i8> @v2i8_func_void() #0 {
   ; CHECK-NEXT:   [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT1]](s16)
   ; CHECK-NEXT:   $vgpr1 = COPY [[ANYEXT3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load <2 x i8>, <2 x i8> addrspace(1)* undef
+  %val = load <2 x i8>, ptr addrspace(1) undef
   ret <2 x i8> %val
 }
 
@@ -885,7 +885,7 @@ define <3 x i8> @v3i8_func_void() #0 {
   ; CHECK-LABEL: name: v3i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[DEF]](p1) :: (load (<3 x s8>) from `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD]](<3 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
   ; CHECK-NEXT:   [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
@@ -897,7 +897,7 @@ define <3 x i8> @v3i8_func_void() #0 {
   ; CHECK-NEXT:   [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT2]](s16)
   ; CHECK-NEXT:   $vgpr2 = COPY [[ANYEXT5]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
-  %val = load <3 x i8>, <3 x i8> addrspace(1)* undef
+  %val = load <3 x i8>, ptr addrspace(1) undef
   ret <3 x i8> %val
 }
 
@@ -905,7 +905,7 @@ define <4  x i8> @v4i8_func_void() #0 {
   ; CHECK-LABEL: name: v4i8_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: (load (<4 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[LOAD1]](<4 x s8>)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV]](s8)
@@ -921,8 +921,8 @@ define <4  x i8> @v4i8_func_void() #0 {
   ; CHECK-NEXT:   [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[ANYEXT3]](s16)
   ; CHECK-NEXT:   $vgpr3 = COPY [[ANYEXT7]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %ptr = load volatile <4  x i8> addrspace(1)*, <4  x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <4  x i8>, <4  x i8> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <4  x i8>, ptr addrspace(1) %ptr
   ret <4  x i8> %val
 }
 
@@ -930,39 +930,38 @@ define {i8, i32} @struct_i8_i32_func_void() #0 {
   ; CHECK-LABEL: name: struct_i8_i32_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load (s8) from `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32) from `ptr addrspace(1) undef` + 4, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ANYEXT]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[LOAD1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
+  %val = load { i8, i32 }, ptr addrspace(1) undef
   ret { i8, i32 } %val
 }
 
-define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 {
+define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_sret_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[COPY1]](p1) :: (volatile load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.gep01, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5)
   ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %val0 = load volatile i8, i8 addrspace(1)* undef
-  %val1 = load volatile i32, i32 addrspace(1)* undef
-  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
-  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
-  store i8 %val0, i8 addrspace(5)* %gep0
-  store i32 %val1, i32 addrspace(5)* %gep1
+  %val0 = load volatile i8, ptr addrspace(1) undef
+  %val1 = load volatile i32, ptr addrspace(1) undef
+  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 0
+  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %arg0, i32 0, i32 1
+  store i8 %val0, ptr addrspace(5) %gep0
+  store i32 %val1, ptr addrspace(5) %gep1
   ret void
 }
 
@@ -977,16 +976,16 @@ define <33 x i32> @v33i32_func_void() #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `<33 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<33 x s32>) from %ir.ptr, align 256, addrspace 1)
   ; CHECK-NEXT:   G_STORE [[LOAD1]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <33 x i32>, ptr addrspace(1) %ptr
   ret <33 x i32> %val
 }
 
-define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 {
+define <33 x i32> @v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
   ; CHECK-LABEL: name: v33i32_func_v33i32_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -1004,8 +1003,8 @@ define <33 x i32> @v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[COPY4]](p1) :: (load (<33 x s32>) from %ir.gep, align 256, addrspace 1)
   ; CHECK-NEXT:   G_STORE [[LOAD]](<33 x s32>), [[COPY]](p5) :: (store (<33 x s32>), align 256, addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %gep = getelementptr inbounds <33 x i32>, <33 x i32> addrspace(1)* %p, i32 %idx
-  %val = load <33 x i32>, <33 x i32> addrspace(1)* %gep
+  %gep = getelementptr inbounds <33 x i32>, ptr addrspace(1) %p, i32 %idx
+  %val = load <33 x i32>, ptr addrspace(1) %gep
   ret <33 x i32> %val
 }
 
@@ -1016,7 +1015,7 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: (load (<32 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -1026,8 +1025,8 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
   ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[PTR_ADD1]](p5) :: (store (s32), align 128, addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr
   ret { <32 x i32>, i32 }%val
 }
 
@@ -1038,7 +1037,7 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `{ i32, <32 x i32> } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (volatile invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p1) :: (load (s32) from %ir.ptr, align 128, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -1048,8 +1047,8 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
   ; CHECK-NEXT:   G_STORE [[LOAD2]](<32 x s32>), [[PTR_ADD1]](p5) :: (store (<32 x s32>), addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
-  %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
+  %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr
   ret { i32, <32 x i32> }%val
 }
 
@@ -1063,10 +1062,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32)
   ; CHECK-NEXT:   [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32)
   ; CHECK-NEXT:   [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32)
@@ -1076,10 +1075,10 @@ define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[LOAD3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %load0 = load volatile i32, i32 addrspace(3)* undef
-  %load1 = load volatile i32, i32 addrspace(3)* undef
-  %load2 = load volatile i32, i32 addrspace(3)* undef
-  %load3 = load volatile i32, i32 addrspace(3)* undef
+  %load0 = load volatile i32, ptr addrspace(3) undef
+  %load1 = load volatile i32, ptr addrspace(3) undef
+  %load2 = load volatile i32, ptr addrspace(3) undef
+  %load3 = load volatile i32, ptr addrspace(3) undef
 
   %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
   %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
@@ -1093,16 +1092,15 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ; CHECK-LABEL: name: v3f32_struct_func_void_wasted_reg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY [[DEF]](p3)
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p3) :: (volatile load (s32) from `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p3) :: (volatile load (s32) from `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   [[IVEC:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[DEF1]], [[LOAD]](s32), [[C]](s32)
   ; CHECK-NEXT:   [[IVEC1:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC]], [[LOAD1]](s32), [[C1]](s32)
   ; CHECK-NEXT:   [[IVEC2:%[0-9]+]]:_(<3 x s32>) = G_INSERT_VECTOR_ELT [[IVEC1]], [[LOAD2]](s32), [[C2]](s32)
@@ -1112,10 +1110,10 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[LOAD3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %load0 = load volatile float, float addrspace(3)* undef
-  %load1 = load volatile float, float addrspace(3)* undef
-  %load2 = load volatile float, float addrspace(3)* undef
-  %load3 = load volatile i32, i32 addrspace(3)* undef
+  %load0 = load volatile float, ptr addrspace(3) undef
+  %load1 = load volatile float, ptr addrspace(3) undef
+  %load2 = load volatile float, ptr addrspace(3) undef
+  %load3 = load volatile i32, ptr addrspace(3) undef
 
   %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
   %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
@@ -1125,7 +1123,7 @@ define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
   ret { <3 x float>, i32 } %insert.4
 }
 
-define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 {
+define void @void_func_sret_max_known_zero_bits(ptr addrspace(5) sret(i8) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_sret_max_known_zero_bits
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1139,19 +1137,19 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0)
   ; CHECK-NEXT:   [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C]](s32)
   ; CHECK-NEXT:   [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C1]](s32)
   ; CHECK-NEXT:   [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[PTRTOINT]], [[C2]](s32)
-  ; CHECK-NEXT:   G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[LSHR]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[LSHR1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[LSHR2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
+  %arg0.int = ptrtoint ptr addrspace(5) %arg0 to i32
 
   %lshr0 = lshr i32 %arg0.int, 16
   %lshr1 = lshr i32 %arg0.int, 17
   %lshr2 = lshr i32 %arg0.int, 18
 
-  store volatile i32 %lshr0, i32 addrspace(3)* undef
-  store volatile i32 %lshr1, i32 addrspace(3)* undef
-  store volatile i32 %lshr2, i32 addrspace(3)* undef
+  store volatile i32 %lshr0, ptr addrspace(3) undef
+  store volatile i32 %lshr1, ptr addrspace(3) undef
+  store volatile i32 %lshr2, ptr addrspace(3) undef
   ret void
 }
 
@@ -1159,7 +1157,7 @@ define i1022 @i1022_func_void() #0 {
   ; CHECK-LABEL: name: i1022_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1195,7 +1193,7 @@ define i1022 @i1022_func_void() #0 {
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[UV31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
-  %val = load i1022, i1022 addrspace(1)* undef
+  %val = load i1022, ptr addrspace(1) undef
   ret i1022 %val
 }
 
@@ -1203,7 +1201,7 @@ define signext i1022 @i1022_signext_func_void() #0 {
   ; CHECK-LABEL: name: i1022_signext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1239,7 +1237,7 @@ define signext i1022 @i1022_signext_func_void() #0 {
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[UV31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
-  %val = load i1022, i1022 addrspace(1)* undef
+  %val = load i1022, ptr addrspace(1) undef
   ret i1022 %val
 }
 
@@ -1247,7 +1245,7 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 {
   ; CHECK-LABEL: name: i1022_zeroext_func_void
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load (s1022) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024)
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
@@ -1283,11 +1281,11 @@ define zeroext i1022 @i1022_zeroext_func_void() #0 {
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[UV31]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
-  %val = load i1022, i1022 addrspace(1)* undef
+  %val = load i1022, ptr addrspace(1) undef
   ret i1022 %val
 }
 
-%struct.with.ptrs = type { <32 x i32>, i32 addrspace(3)*, i32 addrspace(1)*, <2 x i8 addrspace(1)*> }
+%struct.with.ptrs = type { <32 x i32>, ptr addrspace(3), ptr addrspace(1), <2 x ptr addrspace(1)> }
 
 define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
   ; CHECK-LABEL: name: ptr_in_struct_func_void
@@ -1296,16 +1294,16 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `%struct.with.ptrs addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[DEF]](p1) :: (volatile load (<32 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `%struct.with.ptrs addrspace(1)* undef` + 128, align 128, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p1) :: (volatile load (p3) from `ptr addrspace(1) undef` + 128, align 128, addrspace 1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `%struct.with.ptrs addrspace(1)* undef` + 136, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p1) :: (volatile load (p1) from `ptr addrspace(1) undef` + 136, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 144
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `%struct.with.ptrs addrspace(1)* undef` + 144, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD2]](p1) :: (volatile load (<2 x p1>) from `ptr addrspace(1) undef` + 144, addrspace 1)
   ; CHECK-NEXT:   G_STORE [[LOAD]](<32 x s32>), [[COPY]](p5) :: (store (<32 x s32>), addrspace 5)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C3]](s32)
@@ -1317,7 +1315,7 @@ define %struct.with.ptrs @ptr_in_struct_func_void() #0 {
   ; CHECK-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C5]](s32)
   ; CHECK-NEXT:   G_STORE [[LOAD3]](<2 x p1>), [[PTR_ADD5]](p5) :: (store (<2 x p1>), addrspace 5)
   ; CHECK-NEXT:   SI_RETURN
-  %val = load volatile %struct.with.ptrs, %struct.with.ptrs addrspace(1)* undef
+  %val = load volatile %struct.with.ptrs, ptr addrspace(1) undef
   ret %struct.with.ptrs %val
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll
index 16e5aff2b1103..09df9400197e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.v2i65.ll
@@ -2,6 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=irtranslator -verify-machineinstrs -o - %s
 
 define <2 x i65> @v2i65_func_void() #0 {
-  %val = load <2 x i65>, <2 x i65> addrspace(1)* undef
+  %val = load <2 x i65>, ptr addrspace(1) undef
   ret <2 x i65> %val
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 58db5db52a475..4441bacdb58db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s
 
-define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1)* %ptr, float %data) {
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(ptr addrspace(1) %ptr, float %data) {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_intrinsic
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -25,11 +25,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_intrinsic(float addrspace(1
   ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrspace(1)* inreg %ptr, float %data) {
+define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, float %data) {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_intrinsic
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -52,11 +52,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_intrinsic(float addrs
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data)
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrspace(1)* %ptr, float %data) {
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, float %data) {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_flat_intrinsic
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -77,11 +77,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_flat_intrinsic(float addrsp
   ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float addrspace(1)* inreg %ptr, float %data) {
+define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, float %data) {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -104,11 +104,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_flat_intrinsic(float
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1)* %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_no_rtn_atomicrmw
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -129,11 +129,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_no_rtn_atomicrmw(float addrspace(1
   ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrspace(1)* inreg %ptr, float %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
   ; GFX908_GFX11-LABEL: name: global_atomic_fadd_f32_saddr_no_rtn_atomicrmw
   ; GFX908_GFX11: bb.1 (%ir-block.0):
   ; GFX908_GFX11-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -156,11 +156,11 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(float addrs
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F32_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index 0e97b51801b97..caa6307976fbc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)* %ptr, float %data) {
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(ptr addrspace(1) %ptr, float %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -26,11 +26,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_intrinsic(float addrspace(1)*
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspace(1)* inreg %ptr, float %data) {
+define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, float %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -55,11 +55,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_intrinsic(float addrspa
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data)
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data)
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspace(1)* %ptr, float %data) {
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(ptr addrspace(1) %ptr, float %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -82,11 +82,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_flat_intrinsic(float addrspac
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float addrspace(1)* inreg %ptr, float %data) {
+define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, float %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -111,11 +111,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_flat_intrinsic(float ad
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)* inreg %ptr, float %data)
+  %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1) inreg %ptr, float %data)
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)* %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(ptr addrspace(1) %ptr, float %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -138,11 +138,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_rtn_atomicrmw(float addrspace(1)*
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
   ret float %ret
 }
 
-define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspace(1)* inreg %ptr, float %data) #0 {
+define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, float %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f32_saddr_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -167,11 +167,11 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(float addrspa
   ; GFX11-NEXT:   [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_F32_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr, addrspace 1)
   ; GFX11-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_F32_SADDR_RTN]]
   ; GFX11-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = atomicrmw fadd float addrspace(1)* %ptr, float %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, float %data syncscope("wavefront") monotonic
   ret float %ret
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float)
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
+declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index 32f33683f0f5c..0c3fa41cb97fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 
-define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace(1)* %ptr, double %data) {
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -15,11 +15,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_intrinsic(double addrspace(
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1)* %ptr, double %data) {
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(ptr addrspace(1) %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -38,11 +38,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_intrinsic(double addrspace(1
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addrspace(1)* inreg %ptr, double %data) {
+define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -56,11 +56,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_intrinsic(double addr
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrspace(1)* inreg %ptr, double %data) {
+define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -80,11 +80,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_intrinsic(double addrs
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrspace(1)* %ptr, double %data) {
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -97,11 +97,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_flat_intrinsic(double addrs
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrspace(1)* %ptr, double %data) {
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(ptr addrspace(1) %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -120,11 +120,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_flat_intrinsic(double addrsp
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double addrspace(1)* inreg %ptr, double %data) {
+define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -138,11 +138,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_flat_intrinsic(double
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double addrspace(1)* inreg %ptr, double %data) {
+define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, double %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -162,11 +162,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_flat_intrinsic(double
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data)
+  %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace(1)* %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -179,11 +179,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_no_rtn_atomicrmw(double addrspace(
   ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1)* %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(ptr addrspace(1) %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -202,11 +202,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_rtn_atomicrmw(double addrspace(1
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret double %ret
 }
 
-define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addrspace(1)* inreg %ptr, double %data) #0 {
+define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_no_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -220,11 +220,11 @@ define amdgpu_ps void @global_atomic_fadd_f64_saddr_no_rtn_atomicrmw(double addr
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_ADD_F64_SADDR [[V_MOV_B32_e32_]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret void
 }
 
-define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrspace(1)* inreg %ptr, double %data) #0 {
+define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspace(1) inreg %ptr, double %data) #0 {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_f64_saddr_rtn_atomicrmw
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr1
@@ -244,11 +244,11 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(double addrs
   ; GFX90A_GFX940-NEXT:   [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec
   ; GFX90A_GFX940-NEXT:   $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1
-  %ret = atomicrmw fadd double addrspace(1)* %ptr, double %data syncscope("wavefront") monotonic
+  %ret = atomicrmw fadd ptr addrspace(1) %ptr, double %data syncscope("wavefront") monotonic
   ret double %ret
 }
 
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)*, double)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1f64.f64(double addrspace(1)*, double)
+declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
+declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index 6fe7eef2b10d1..247ef8254e2df 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 
-define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) {
   ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn_intrinsic
   ; GFX908: bb.1 (%ir-block.0):
   ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -24,11 +24,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_intrinsic(<2 x half> addr
   ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) {
+define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
   ; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic
   ; GFX908: bb.1 (%ir-block.0):
   ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -51,11 +51,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_intrinsic(<2 x half
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) {
   ; GFX908-LABEL: name: global_atomic_fadd_v2f16_no_rtn_flat_intrinsic
   ; GFX908: bb.1 (%ir-block.0):
   ; GFX908-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -76,11 +76,11 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_no_rtn_flat_intrinsic(<2 x half>
   ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
 
-define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) {
+define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
   ; GFX908-LABEL: name: global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic
   ; GFX908: bb.1 (%ir-block.0):
   ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -103,9 +103,9 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(<2 x
   ; GFX90A_GFX940-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; GFX90A_GFX940-NEXT:   GLOBAL_ATOMIC_PK_ADD_F16_SADDR [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
 
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
+declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index 3ee4d957a572b..1fa5f575808f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
 
-define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -14,11 +14,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_intrinsic(<2 x half> a
   ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) {
+define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -31,11 +31,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_intrinsic(<2 x h
   ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(ptr addrspace(1) %ptr, <2 x half> %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -47,11 +47,11 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_rtn_flat_intrinsic(<2 x ha
   ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_RTN]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(<2 x half> addrspace(1)* inreg %ptr, <2 x half> %data) {
+define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(ptr addrspace(1) inreg %ptr, <2 x half> %data) {
   ; GFX90A_GFX940-LABEL: name: global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic
   ; GFX90A_GFX940: bb.1 (%ir-block.0):
   ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $vgpr0
@@ -64,9 +64,9 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(<
   ; GFX90A_GFX940-NEXT:   [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN [[V_MOV_B32_e32_]], [[COPY2]], [[REG_SEQUENCE]], 0, 1, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.ptr, addrspace 1)
   ; GFX90A_GFX940-NEXT:   $vgpr0 = COPY [[GLOBAL_ATOMIC_PK_ADD_F16_SADDR_RTN]]
   ; GFX90A_GFX940-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
 
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>)
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
+declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
index a216a8b509e87..4eddf087bbec2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
@@ -6,10 +6,10 @@
 @external_private = external addrspace(5) global i32, align 4
 @internal_private = internal addrspace(5) global i32 undef, align 4
 
-define i32 addrspace(5)* @fn_external_private() {
-  ret i32 addrspace(5)* @external_private
+define ptr addrspace(5) @fn_external_private() {
+  ret ptr addrspace(5) @external_private
 }
 
-define i32 addrspace(5)* @fn_internal_private() {
-  ret i32 addrspace(5)* @internal_private
+define ptr addrspace(5) @fn_internal_private() {
+  ret ptr addrspace(5) @internal_private
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
index 0812972204354..b60dd6dea7f79 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.ll
@@ -13,7 +13,7 @@
 @internal_other = internal addrspace(999) global i32 9, align 4
 
 
-define i32 addrspace(4)* @external_constant_got() {
+define ptr addrspace(4) @external_constant_got() {
   ; GCN-LABEL: name: external_constant_got
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant + 4, target-flags(amdgpu-gotprel32-hi) @external_constant + 12, implicit-def $scc
@@ -22,10 +22,10 @@ define i32 addrspace(4)* @external_constant_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(4)* @external_constant
+  ret ptr addrspace(4) @external_constant
 }
 
-define i32 addrspace(1)* @external_global_got() {
+define ptr addrspace(1) @external_global_got() {
   ; GCN-LABEL: name: external_global_got
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_global + 4, target-flags(amdgpu-gotprel32-hi) @external_global + 12, implicit-def $scc
@@ -34,10 +34,10 @@ define i32 addrspace(1)* @external_global_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(1)* @external_global
+  ret ptr addrspace(1) @external_global
 }
 
-define i32 addrspace(999)* @external_other_got() {
+define ptr addrspace(999) @external_other_got() {
   ; GCN-LABEL: name: external_other_got
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_other + 4, target-flags(amdgpu-gotprel32-hi) @external_other + 12, implicit-def $scc
@@ -46,10 +46,10 @@ define i32 addrspace(999)* @external_other_got() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(999)* @external_other
+  ret ptr addrspace(999) @external_other
 }
 
-define i32 addrspace(4)* @internal_constant_pcrel() {
+define ptr addrspace(4) @internal_constant_pcrel() {
   ; GCN-LABEL: name: internal_constant_pcrel
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant + 4, target-flags(amdgpu-rel32-hi) @internal_constant + 12, implicit-def $scc
@@ -57,10 +57,10 @@ define i32 addrspace(4)* @internal_constant_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(4)* @internal_constant
+  ret ptr addrspace(4) @internal_constant
 }
 
-define i32 addrspace(1)* @internal_global_pcrel() {
+define ptr addrspace(1) @internal_global_pcrel() {
   ; GCN-LABEL: name: internal_global_pcrel
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p1) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_global + 4, target-flags(amdgpu-rel32-hi) @internal_global + 12, implicit-def $scc
@@ -68,10 +68,10 @@ define i32 addrspace(1)* @internal_global_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(1)* @internal_global
+  ret ptr addrspace(1) @internal_global
 }
 
-define i32 addrspace(999)* @internal_other_pcrel() {
+define ptr addrspace(999) @internal_other_pcrel() {
   ; GCN-LABEL: name: internal_other_pcrel
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p999) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_other + 4, target-flags(amdgpu-rel32-hi) @internal_other + 12, implicit-def $scc
@@ -79,10 +79,10 @@ define i32 addrspace(999)* @internal_other_pcrel() {
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  ret i32 addrspace(999)* @internal_other
+  ret ptr addrspace(999) @internal_other
 }
 
-define i32 addrspace(6)* @external_constant32_got() {
+define ptr addrspace(6) @external_constant32_got() {
   ; GCN-LABEL: name: external_constant32_got
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @external_constant32 + 4, target-flags(amdgpu-gotprel32-hi) @external_constant32 + 12, implicit-def $scc
@@ -90,15 +90,15 @@ define i32 addrspace(6)* @external_constant32_got() {
   ; GCN-NEXT:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[LOAD]](p4), 0
   ; GCN-NEXT:   $vgpr0 = COPY [[EXTRACT]](p6)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
-  ret i32 addrspace(6)* @external_constant32
+  ret ptr addrspace(6) @external_constant32
 }
 
-define i32 addrspace(6)* @internal_constant32_pcrel() {
+define ptr addrspace(6) @internal_constant32_pcrel() {
   ; GCN-LABEL: name: internal_constant32_pcrel
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[SI_PC_ADD_REL_OFFSET:%[0-9]+]]:sreg_64(p4) = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-rel32-lo) @internal_constant32 + 4, target-flags(amdgpu-rel32-hi) @internal_constant32 + 12, implicit-def $scc
   ; GCN-NEXT:   [[EXTRACT:%[0-9]+]]:_(p6) = G_EXTRACT [[SI_PC_ADD_REL_OFFSET]](p4), 0
   ; GCN-NEXT:   $vgpr0 = COPY [[EXTRACT]](p6)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
-  ret i32 addrspace(6)* @internal_constant32
+  ret ptr addrspace(6) @internal_constant32
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
index d6c675a636e9a..21fa4afb374cd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
@@ -12,19 +12,19 @@
 
 ; CHECK-LABEL: {{^}}dynamic_shared_array_0:
 ; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
-define amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) {
+define amdgpu_kernel void @dynamic_shared_array_0(ptr addrspace(1) %out) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val0, float addrspace(3)* %arrayidx1, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %tid.x
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val0, ptr addrspace(3) %arrayidx1, align 4
   ret void
 }
 
 ; CHECK-LABEL: {{^}}dynamic_shared_array_1:
 ; CHECK: v_lshlrev_b32_e32 [[IDX:v[0-9]+]], 2, {{v[0-9]+}}
 ; CHECK: v_add_u32_e32 {{v[0-9]+}}, 0xc00, [[IDX]]
-define amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) {
+define amdgpu_kernel void @dynamic_shared_array_1(ptr addrspace(1) %out, i32 %cond) {
 entry:
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %idx.0 = add nsw i32 %tid.x, 64
@@ -32,19 +32,19 @@ entry:
   br i1 %tmp, label %if, label %else
 
 if:                                               ; preds = %entry
-  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
   br label %endif
 
 else:                                             ; preds = %entry
-  %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
+  %val1 = load float, ptr addrspace(3) %arrayidx1, align 4
   br label %endif
 
 endif:                                            ; preds = %else, %if
   %val = phi float [ %val0, %if ], [ %val1, %else ]
-  %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val, float addrspace(3)* %arrayidx, align 4
+  %arrayidx = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val, ptr addrspace(3) %arrayidx, align 4
   ret void
 }
 
@@ -54,10 +54,10 @@ endif:                                            ; preds = %else, %if
 define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %vidx = add i32 %tid.x, %idx
-  %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx
-  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val0, float addrspace(3)* %arrayidx1, align 4
+  %arrayidx0 = getelementptr inbounds [4096 x float], ptr addrspace(3) @lds2, i32 0, i32 %vidx
+  %val0 = load float, ptr addrspace(3) %arrayidx0, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val0, ptr addrspace(3) %arrayidx1, align 4
   ret void
 }
 
@@ -69,11 +69,11 @@ define amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
 define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %vidx = add i32 %tid.x, %idx
-  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
-  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
+  %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
   %val1 = uitofp i8 %val0 to float
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val1, float addrspace(3)* %arrayidx1, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val1, ptr addrspace(3) %arrayidx1, align 4
   ret void
 }
 
@@ -85,14 +85,14 @@ define amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
 define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %vidx = add i32 %tid.x, %idx
-  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
-  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
+  %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
   %val1 = uitofp i8 %val0 to float
   %val2 = uitofp i8 %val0 to double
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val1, float addrspace(3)* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x
-  store double %val2, double addrspace(3)* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val1, ptr addrspace(3) %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared1, i32 0, i32 %tid.x
+  store double %val2, ptr addrspace(3) %arrayidx2, align 4
   ret void
 }
 
@@ -103,14 +103,14 @@ define amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
 define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %vidx = add i32 %tid.x, %idx
-  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
-  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
+  %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
   %val1 = uitofp i8 %val0 to float
   %val2 = uitofp i8 %val0 to double
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val1, float addrspace(3)* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x
-  store double %val2, double addrspace(3)* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val1, ptr addrspace(3) %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared2, i32 0, i32 %tid.x
+  store double %val2, ptr addrspace(3) %arrayidx2, align 4
   ret void
 }
 
@@ -121,14 +121,14 @@ define amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
 define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
   %vidx = add i32 %tid.x, %idx
-  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
-  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
+  %arrayidx0 = getelementptr inbounds [67 x i8], ptr addrspace(3) @lds3, i32 0, i32 %vidx
+  %val0 = load i8, ptr addrspace(3) %arrayidx0, align 4
   %val1 = uitofp i8 %val0 to float
   %val2 = uitofp i8 %val0 to double
-  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
-  store float %val1, float addrspace(3)* %arrayidx1, align 4
-  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x
-  store double %val2, double addrspace(3)* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds [0 x float], ptr addrspace(3) @dynamic_shared0, i32 0, i32 %tid.x
+  store float %val1, ptr addrspace(3) %arrayidx1, align 4
+  %arrayidx2 = getelementptr inbounds [0 x double], ptr addrspace(3) @dynamic_shared3, i32 0, i32 %tid.x
+  store double %val2, ptr addrspace(3) %arrayidx2, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 7828a632f9e46..a5c9b896eb0d0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -7,7 +7,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=4 < %s | FileCheck --check-prefixes=GFX9V4 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 --amdhsa-code-object-version=5 < %s | FileCheck --check-prefixes=GFX9V5 %s
 
-define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 addrspace(3)* %ptr.local) {
+define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
 ; GFX8V3-LABEL: addrspacecast:
 ; GFX8V3:       ; %bb.0:
 ; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -159,14 +159,14 @@ define amdgpu_kernel void @addrspacecast(i32 addrspace(5)* %ptr.private, i32 add
 ; GFX9V5-NEXT:    flat_store_dword v[0:1], v2
 ; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9V5-NEXT:    s_endpgm
-  %flat.private = addrspacecast i32 addrspace(5)* %ptr.private to i32*
-  %flat.local = addrspacecast i32 addrspace(3)* %ptr.local to i32*
-  store volatile i32 1, i32* %flat.private
-  store volatile i32 2, i32* %flat.local
+  %flat.private = addrspacecast ptr addrspace(5) %ptr.private to ptr
+  %flat.local = addrspacecast ptr addrspace(3) %ptr.local to ptr
+  store volatile i32 1, ptr %flat.private
+  store volatile i32 2, ptr %flat.local
   ret void
 }
 
-define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
+define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
 ; GFX8V3-LABEL: llvm_amdgcn_is_shared:
 ; GFX8V3:       ; %bb.0:
 ; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -244,13 +244,13 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(i8* %ptr) {
 ; GFX9V5-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9V5-NEXT:    s_endpgm
-  %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
+  %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
   %zext = zext i1 %is.shared to i32
-  store volatile i32 %zext, i32 addrspace(1)* undef
+  store volatile i32 %zext, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
+define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
 ; GFX8V3-LABEL: llvm_amdgcn_is_private:
 ; GFX8V3:       ; %bb.0:
 ; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -328,9 +328,9 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(i8* %ptr) {
 ; GFX9V5-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9V5-NEXT:    s_endpgm
-  %is.private = call i1 @llvm.amdgcn.is.private(i8* %ptr)
+  %is.private = call i1 @llvm.amdgcn.is.private(ptr %ptr)
   %zext = zext i1 %is.private to i32
-  store volatile i32 %zext, i32 addrspace(1)* undef
+  store volatile i32 %zext, ptr addrspace(1) undef
   ret void
 }
 
@@ -395,7 +395,7 @@ define amdgpu_kernel void @llvm_debugtrap() {
   unreachable
 }
 
-define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr)  {
+define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
 ; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
 ; GFX8V3:       ; %bb.0:
 ; GFX8V3-NEXT:    v_mov_b32_e32 v0, s6
@@ -521,22 +521,22 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(i64 addrspace(1)* %ptr)  {
 ; GFX9V5-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9V5-NEXT:    s_endpgm
-  %queue.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
-  %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
+  %queue.ptr = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+  %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %dispatch.ptr = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
   %dispatch.id = call i64 @llvm.amdgcn.dispatch.id()
-  %queue.load = load volatile i8, i8 addrspace(4)* %queue.ptr
-  %implicitarg.load = load volatile i8, i8 addrspace(4)* %implicitarg.ptr
-  %dispatch.load = load volatile i8, i8 addrspace(4)* %dispatch.ptr
-  store volatile i64 %dispatch.id, i64 addrspace(1)* %ptr
+  %queue.load = load volatile i8, ptr addrspace(4) %queue.ptr
+  %implicitarg.load = load volatile i8, ptr addrspace(4) %implicitarg.ptr
+  %dispatch.load = load volatile i8, ptr addrspace(4) %dispatch.ptr
+  store volatile i64 %dispatch.id, ptr addrspace(1) %ptr
   ret void
 }
 
-declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr()
-declare noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 declare i64 @llvm.amdgcn.dispatch.id()
-declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
-declare i1 @llvm.amdgcn.is.shared(i8*)
-declare i1 @llvm.amdgcn.is.private(i8*)
+declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+declare i1 @llvm.amdgcn.is.shared(ptr)
+declare i1 @llvm.amdgcn.is.private(ptr)
 declare void @llvm.trap()
 declare void @llvm.debugtrap()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
index 15b92df2f1601..9a5dc978e8331 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
@@ -4,12 +4,12 @@
 
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar)
-; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
-; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big)
 ; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small)
-; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg)' (in function: use_pointer_too_small)
-; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg)' (in function: use_pointer_too_big)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)' (in function: use_pointer_too_small)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)' (in function: use_pointer_too_big)
 
 
 ; This asm is broken because it's using a 5 element wide physical
@@ -59,7 +59,7 @@ define i32 @return_type_is_too_small_scalar() {
   ret i32 %reg
 }
 
-define i8 addrspace(1)* @return_type_is_too_big_pointer() {
+define ptr addrspace(1) @return_type_is_too_big_pointer() {
   ; CHECK-LABEL: name: return_type_is_too_big_pointer
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
@@ -67,11 +67,11 @@ define i8 addrspace(1)* @return_type_is_too_big_pointer() {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8
-  %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}" ()
-  ret i8 addrspace(1)* %reg
+  %reg = call ptr addrspace(1) asm sideeffect "; def $0", "={v8}" ()
+  ret ptr addrspace(1) %reg
 }
 
-define i8 addrspace(3)* @return_type_is_too_small_pointer() {
+define ptr addrspace(3) @return_type_is_too_small_pointer() {
   ; CHECK-LABEL: name: return_type_is_too_small_pointer
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
@@ -79,8 +79,8 @@ define i8 addrspace(3)* @return_type_is_too_small_pointer() {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
-  %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}" ()
-  ret i8 addrspace(3)* %reg
+  %reg = call ptr addrspace(3) asm sideeffect "; def $0", "={v[8:9]}" ()
+  ret ptr addrspace(3) %reg
 }
 
 define void @use_vector_too_small(<8 x i32> %arg) {
@@ -154,7 +154,7 @@ define void @use_scalar_too_big(i32 %arg) {
   ret void
 }
 
-define void @use_pointer_too_small(i8 addrspace(1)* %arg) {
+define void @use_pointer_too_small(ptr addrspace(1) %arg) {
   ; CHECK-LABEL: name: use_pointer_too_small
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
@@ -165,11 +165,11 @@ define void @use_pointer_too_small(i8 addrspace(1)* %arg) {
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
-  call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg)
+  call void asm sideeffect "; use $0", "{v0}"(ptr addrspace(1) %arg)
   ret void
 }
 
-define void @use_pointer_too_big(i32 addrspace(3)* %arg) {
+define void @use_pointer_too_big(ptr addrspace(3) %arg) {
   ; CHECK-LABEL: name: use_pointer_too_big
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
@@ -178,6 +178,6 @@ define void @use_pointer_too_big(i32 addrspace(3)* %arg) {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1 (%ir-block.0):
-  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg)
+  call void asm sideeffect "; use $0", "{v[0:1]}"(ptr addrspace(3) %arg)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
index ecff0fb2b1ceb..8ec842021e176 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement-stack-lower.ll
@@ -4,7 +4,7 @@
 ; Check lowering of some large insertelement that use the stack
 ; instead of register indexing.
 
-define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.ptr, <64 x i32> addrspace(1)* %ptr, i32 %val, i32 %idx) #0 {
+define amdgpu_kernel void @v_insert_v64i32_varidx(ptr addrspace(1) %out.ptr, ptr addrspace(1) %ptr, i32 %val, i32 %idx) #0 {
 ; GCN-LABEL: v_insert_v64i32_varidx:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[20:23], s[4:5], 0x0
@@ -265,9 +265,9 @@ define amdgpu_kernel void @v_insert_v64i32_varidx(<64 x i32> addrspace(1)* %out.
 ; GCN-NEXT:    global_store_dwordx4 v0, v[56:59], s[20:21] offset:224
 ; GCN-NEXT:    global_store_dwordx4 v0, v[60:63], s[20:21] offset:240
 ; GCN-NEXT:    s_endpgm
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %ptr
+  %vec = load <64 x i32>, ptr addrspace(1) %ptr
   %insert = insertelement <64 x i32> %vec, i32 %val, i32 %idx
-  store <64 x i32> %insert, <64 x i32> addrspace(1)* %out.ptr
+  store <64 x i32> %insert, ptr addrspace(1) %out.ptr
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
index 922fdc9dd4fdb..5f2e1e9d509c9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
 
-define amdgpu_ps void @insertelement_s_v2i16_s_s(<2 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v2i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v2i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -91,13 +91,13 @@ define amdgpu_ps void @insertelement_s_v2i16_s_s(<2 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i16_s_s(<2 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v2i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v2i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v2, v[0:1], off
@@ -181,13 +181,13 @@ define amdgpu_ps void @insertelement_v_v2i16_s_s(<2 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(1 )* %ptr
+  %vec = load <2 x i16>, ptr addrspace(1 ) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i16_v_s(<2 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v2i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v2i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -267,13 +267,13 @@ define amdgpu_ps void @insertelement_s_v2i16_v_s(<2 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i16_s_v(<2 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v2i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v2i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -360,13 +360,13 @@ define amdgpu_ps void @insertelement_s_v2i16_s_v(<2 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i16_v_v(<2 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v2i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v2i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -450,13 +450,13 @@ define amdgpu_ps void @insertelement_s_v2i16_v_v(<2 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(4)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i16_s_v(<2 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v2i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v2i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v3, v[0:1], off
@@ -543,13 +543,13 @@ define amdgpu_ps void @insertelement_v_v2i16_s_v(<2 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i16_v_s(<2 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v2i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v2i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v3, v[0:1], off
@@ -632,13 +632,13 @@ define amdgpu_ps void @insertelement_v_v2i16_v_s(<2 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i16_v_v(<2 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v2i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v2i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v4, v[0:1], off
@@ -722,70 +722,70 @@ define amdgpu_ps void @insertelement_v_v2i16_v_v(<2 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i16>, <2 x i16> addrspace(1)* %ptr
+  %vec = load <2 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i16> %vec, i16 %val, i32 %idx
-  store <2 x i16> %insert, <2 x i16> addrspace(1)* null
+  store <2 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
 ; FIXME: 3 element load/store legalization
-; define amdgpu_ps void @insertelement_s_v3i16_s_s(<3 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i16_s_s(<3 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(1 )* %ptr
+; define amdgpu_ps void @insertelement_v_v3i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(1 ) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i16_v_s(<3 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i16_s_v(<3 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i16_v_v(<3 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i16_s_v(<3 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i16_v_s(<3 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i16_v_v(<3 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) {
-;   %vec = load <3 x i16>, <3 x i16> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) {
+;   %vec = load <3 x i16>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i16> %vec, i16 %val, i32 %idx
-;   store <3 x i16> %insert, <3 x i16> addrspace(1)* null
+;   store <3 x i16> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-define amdgpu_ps void @insertelement_v_v4i16_s_s(<4 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v4i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v4i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -900,13 +900,13 @@ define amdgpu_ps void @insertelement_v_v4i16_s_s(<4 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(1 )* %ptr
+  %vec = load <4 x i16>, ptr addrspace(1 ) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i16_v_s(<4 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v4i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v4i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -1031,13 +1031,13 @@ define amdgpu_ps void @insertelement_s_v4i16_v_s(<4 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i16_s_v(<4 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v4i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v4i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -1171,13 +1171,13 @@ define amdgpu_ps void @insertelement_s_v4i16_s_v(<4 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i16_v_v(<4 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v4i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v4i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -1309,13 +1309,13 @@ define amdgpu_ps void @insertelement_s_v4i16_v_v(<4 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(4)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i16_s_v(<4 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v4i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v4i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -1433,13 +1433,13 @@ define amdgpu_ps void @insertelement_v_v4i16_s_v(<4 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i16_v_s(<4 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v4i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v4i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -1552,13 +1552,13 @@ define amdgpu_ps void @insertelement_v_v4i16_v_s(<4 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i16_v_v(<4 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v4i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v4i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -1674,13 +1674,13 @@ define amdgpu_ps void @insertelement_v_v4i16_v_v(<4 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i16>, <4 x i16> addrspace(1)* %ptr
+  %vec = load <4 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i16> %vec, i16 %val, i32 %idx
-  store <4 x i16> %insert, <4 x i16> addrspace(1)* null
+  store <4 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i16_s_s(<8 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v8i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v8i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -1857,13 +1857,13 @@ define amdgpu_ps void @insertelement_s_v8i16_s_s(<8 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i16_s_s(<8 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v8i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
@@ -2012,13 +2012,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(<8 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(1 )* %ptr
+  %vec = load <8 x i16>, ptr addrspace(1 ) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i16_v_s(<8 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v8i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v8i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -2194,13 +2194,13 @@ define amdgpu_ps void @insertelement_s_v8i16_v_s(<8 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i16_s_v(<8 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v8i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
@@ -2381,13 +2381,13 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(<8 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i16_v_v(<8 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v8i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
@@ -2568,13 +2568,13 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(<8 x i16> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(4)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i16_s_v(<8 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v8i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -2724,13 +2724,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(<8 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i16_v_s(<8 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v8i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -2876,13 +2876,13 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(<8 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i16_v_v(<8 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v8i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
@@ -3029,13 +3029,13 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(<8 x i16> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[8:9], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
+  %vec = load <8 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i16> %vec, i16 %val, i32 %idx
-  store <8 x i16> %insert, <8 x i16> addrspace(1)* null
+  store <8 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v16i16_s_s(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v16i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
@@ -3212,13 +3212,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_s(<16 x i16> addrspace(4)* inreg
 ; GFX11-NEXT:    global_store_b128 v[10:11], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v16i16_s_s(ptr addrspace(1) %ptr, i16 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v16i16_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off
@@ -3366,13 +3366,13 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX11-NEXT:    global_store_b128 v[10:11], v[6:9], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(1 )* %ptr
+  %vec = load <16 x i16>, ptr addrspace(1 ) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v16i16_v_s(ptr addrspace(4) inreg %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v16i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[2:3], 0x0
@@ -3547,13 +3547,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_s(<16 x i16> addrspace(4)* inreg
 ; GFX11-NEXT:    global_store_b128 v[10:11], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v16i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[16:23], s[2:3], 0x0
@@ -3841,13 +3841,13 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(<16 x i16> addrspace(4)* inreg
 ; GFX11-NEXT:    global_store_b128 v[10:11], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v16i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[12:19], s[2:3], 0x0
@@ -4135,13 +4135,13 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(<16 x i16> addrspace(4)* inreg
 ; GFX11-NEXT:    global_store_b128 v[10:11], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(4)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr, i16 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v16i16_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -4381,13 +4381,13 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX11-NEXT:    global_store_b128 v[13:14], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr, i16 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v16i16_v_s(ptr addrspace(1) %ptr, i16 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v16i16_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -4532,13 +4532,13 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(<16 x i16> addrspace(1)* %ptr,
 ; GFX11-NEXT:    global_store_b128 v[11:12], v[7:10], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr, i16 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v16i16_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
@@ -4775,8 +4775,8 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(<16 x i16> addrspace(1)* %ptr,
 ; GFX11-NEXT:    global_store_b128 v[14:15], v[4:7], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
+  %vec = load <16 x i16>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i16> %vec, i16 %val, i32 %idx
-  store <16 x i16> %insert, <16 x i16> addrspace(1)* null
+  store <16 x i16> %insert, ptr addrspace(1) null
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
index d0885fa79e55d..bc06c38c984fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i8.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
 
-define amdgpu_ps void @insertelement_s_v2i8_s_s(<2 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v2i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v2i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
@@ -107,13 +107,13 @@ define amdgpu_ps void @insertelement_s_v2i8_s_s(<2 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i8_s_s(<2 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v2i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v2i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
@@ -209,13 +209,13 @@ define amdgpu_ps void @insertelement_v_v2i8_s_s(<2 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(1 )* %ptr
+  %vec = load <2 x i8>, ptr addrspace(1 ) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i8_v_s(<2 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v2i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v2i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
@@ -313,13 +313,13 @@ define amdgpu_ps void @insertelement_s_v2i8_v_s(<2 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i8_s_v(<2 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v2i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v2i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
@@ -421,13 +421,13 @@ define amdgpu_ps void @insertelement_s_v2i8_s_v(<2 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v2i8_v_v(<2 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v2i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v2i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
@@ -525,13 +525,13 @@ define amdgpu_ps void @insertelement_s_v2i8_v_v(<2 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(4)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i8_s_v(<2 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v2i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v2i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
@@ -627,13 +627,13 @@ define amdgpu_ps void @insertelement_v_v2i8_s_v(<2 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i8_v_s(<2 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v2i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v2i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
@@ -726,13 +726,13 @@ define amdgpu_ps void @insertelement_v_v2i8_v_s(<2 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v2i8_v_v(<2 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v2i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v2i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
@@ -825,70 +825,70 @@ define amdgpu_ps void @insertelement_v_v2i8_v_v(<2 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b16 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <2 x i8>, <2 x i8> addrspace(1)* %ptr
+  %vec = load <2 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <2 x i8> %vec, i8 %val, i32 %idx
-  store <2 x i8> %insert, <2 x i8> addrspace(1)* null
+  store <2 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
 ; FIXME: 3 element load/store legalization
-; define amdgpu_ps void @insertelement_s_v3i8_s_s(<3 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i8_s_s(<3 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(1 )* %ptr
+; define amdgpu_ps void @insertelement_v_v3i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(1 ) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i8_v_s(<3 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i8_s_v(<3 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_s_v3i8_v_v(<3 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(4)* %ptr
+; define amdgpu_ps void @insertelement_s_v3i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(4) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i8_s_v(<3 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i8_v_s(<3 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-; define amdgpu_ps void @insertelement_v_v3i8_v_v(<3 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) {
-;   %vec = load <3 x i8>, <3 x i8> addrspace(1)* %ptr
+; define amdgpu_ps void @insertelement_v_v3i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) {
+;   %vec = load <3 x i8>, ptr addrspace(1) %ptr
 ;   %insert = insertelement <3 x i8> %vec, i8 %val, i32 %idx
-;   store <3 x i8> %insert, <3 x i8> addrspace(1)* null
+;   store <3 x i8> %insert, ptr addrspace(1) null
 ;   ret void
 ; }
 
-define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v4i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v4i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v2, v[0:1], off
@@ -974,13 +974,13 @@ define amdgpu_ps void @insertelement_v_v4i8_s_s(<4 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(1 )* %ptr
+  %vec = load <4 x i8>, ptr addrspace(1 ) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v4i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v4i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -1061,13 +1061,13 @@ define amdgpu_ps void @insertelement_s_v4i8_v_s(<4 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v4i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v4i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -1155,13 +1155,13 @@ define amdgpu_ps void @insertelement_s_v4i8_s_v(<4 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v4i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v4i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
@@ -1246,13 +1246,13 @@ define amdgpu_ps void @insertelement_s_v4i8_v_v(<4 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(4)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v4i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v4i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v3, v[0:1], off
@@ -1341,13 +1341,13 @@ define amdgpu_ps void @insertelement_v_v4i8_s_v(<4 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v4i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v4i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v3, v[0:1], off
@@ -1432,13 +1432,13 @@ define amdgpu_ps void @insertelement_v_v4i8_v_s(<4 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v4i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v4i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dword v4, v[0:1], off
@@ -1524,13 +1524,13 @@ define amdgpu_ps void @insertelement_v_v4i8_v_v(<4 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b32 v[0:1], v2, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %vec = load <4 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <4 x i8> %vec, i8 %val, i32 %idx
-  store <4 x i8> %insert, <4 x i8> addrspace(1)* null
+  store <4 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v8i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v8i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -1658,13 +1658,13 @@ define amdgpu_ps void @insertelement_s_v8i8_s_s(<8 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b64 v[0:1], v[2:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v8i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v8i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -1781,13 +1781,13 @@ define amdgpu_ps void @insertelement_v_v8i8_s_s(<8 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(1 )* %ptr
+  %vec = load <8 x i8>, ptr addrspace(1 ) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v8i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v8i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -1913,13 +1913,13 @@ define amdgpu_ps void @insertelement_s_v8i8_v_s(<8 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v8i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v8i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -2054,13 +2054,13 @@ define amdgpu_ps void @insertelement_s_v8i8_s_v(<8 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v8i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v8i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x0
@@ -2193,13 +2193,13 @@ define amdgpu_ps void @insertelement_s_v8i8_v_v(<8 x i8> addrspace(4)* inreg %pt
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(4)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v8i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v8i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -2319,13 +2319,13 @@ define amdgpu_ps void @insertelement_v_v8i8_s_v(<8 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v8i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v8i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -2440,13 +2440,13 @@ define amdgpu_ps void @insertelement_v_v8i8_v_s(<8 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v8i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v8i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
@@ -2564,13 +2564,13 @@ define amdgpu_ps void @insertelement_v_v8i8_v_v(<8 x i8> addrspace(1)* %ptr, i8
 ; GFX11-NEXT:    global_store_b64 v[2:3], v[0:1], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %vec = load <8 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <8 x i8> %vec, i8 %val, i32 %idx
-  store <8 x i8> %insert, <8 x i8> addrspace(1)* null
+  store <8 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v16i8_s_s(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v16i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -2747,13 +2747,13 @@ define amdgpu_ps void @insertelement_s_v16i8_s_s(<16 x i8> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v16i8_s_s(ptr addrspace(1) %ptr, i8 inreg %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v16i8_s_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[0:3], v[0:1], off
@@ -2902,13 +2902,13 @@ define amdgpu_ps void @insertelement_v_v16i8_s_s(<16 x i8> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(1 )* %ptr
+  %vec = load <16 x i8>, ptr addrspace(1 ) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_s_v16i8_v_s(ptr addrspace(4) inreg %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_s_v16i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -3084,13 +3084,13 @@ define amdgpu_ps void @insertelement_s_v16i8_v_s(<16 x i8> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v16i8_s_v(ptr addrspace(4) inreg %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v16i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x0
@@ -3271,13 +3271,13 @@ define amdgpu_ps void @insertelement_s_v16i8_s_v(<16 x i8> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_s_v16i8_v_v(ptr addrspace(4) inreg %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_s_v16i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
@@ -3458,13 +3458,13 @@ define amdgpu_ps void @insertelement_s_v16i8_v_v(<16 x i8> addrspace(4)* inreg %
 ; GFX11-NEXT:    global_store_b128 v[4:5], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(4)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(4) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i8 inreg %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v16i8_s_v(ptr addrspace(1) %ptr, i8 inreg %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v16i8_s_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -3614,13 +3614,13 @@ define amdgpu_ps void @insertelement_v_v16i8_s_v(<16 x i8> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i8 %val, i32 inreg %idx) {
+define amdgpu_ps void @insertelement_v_v16i8_v_s(ptr addrspace(1) %ptr, i8 %val, i32 inreg %idx) {
 ; GFX9-LABEL: insertelement_v_v16i8_v_s:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[3:6], v[0:1], off
@@ -3766,13 +3766,13 @@ define amdgpu_ps void @insertelement_v_v16i8_v_s(<16 x i8> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[7:8], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i8 %val, i32 %idx) {
+define amdgpu_ps void @insertelement_v_v16i8_v_v(ptr addrspace(1) %ptr, i8 %val, i32 %idx) {
 ; GFX9-LABEL: insertelement_v_v16i8_v_v:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    global_load_dwordx4 v[4:7], v[0:1], off
@@ -3919,8 +3919,8 @@ define amdgpu_ps void @insertelement_v_v16i8_v_v(<16 x i8> addrspace(1)* %ptr, i
 ; GFX11-NEXT:    global_store_b128 v[8:9], v[0:3], off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %vec = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %vec = load <16 x i8>, ptr addrspace(1) %ptr
   %insert = insertelement <16 x i8> %vec, i8 %val, i32 %idx
-  store <16 x i8> %insert, <16 x i8> addrspace(1)* null
+  store <16 x i8> %insert, ptr addrspace(1) null
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
index c81ffc4790106..714bb5687ab88 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in, <64 x i32> addrspace(1)* %ptr.out) #0 {
+define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addrspace(1) %ptr.out) #0 {
 ; GCN-LABEL: v_insert_v64i32_37:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -152,11 +152,11 @@ define amdgpu_kernel void @v_insert_v64i32_37(<64 x i32> addrspace(1)* %ptr.in,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.in = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.in, i32 %id
-  %vec = load <64 x i32>, <64 x i32> addrspace(1)* %gep.in
+  %gep.in = getelementptr <64 x i32>, ptr addrspace(1) %ptr.in, i32 %id
+  %vec = load <64 x i32>, ptr addrspace(1) %gep.in
   %insert = insertelement <64 x i32> %vec, i32 999, i32 37
-  %gep.out = getelementptr <64 x i32>, <64 x i32> addrspace(1)* %ptr.out, i32 %id
-  store <64 x i32> %insert, <64 x i32> addrspace(1)* %gep.out
+  %gep.out = getelementptr <64 x i32>, ptr addrspace(1) %ptr.out, i32 %id
+  store <64 x i32> %insert, ptr addrspace(1) %gep.out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
index f4b821ca602c7..b668c42e99fe2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.ll
@@ -46,7 +46,7 @@ entry:
   ret <8 x i32> %insert
 }
 
-define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
+define amdgpu_ps <8 x ptr addrspace(3)> @dyn_insertelement_v8p3i8_s_s_s(<8 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_s_s_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    s_cmp_eq_u32 s11, 0
@@ -81,8 +81,8 @@ define amdgpu_ps <8 x i8 addrspace(3)*> @dyn_insertelement_v8p3i8_s_s_s(<8 x i8
 ; GFX10PLUS-NEXT:    s_movreld_b32 s0, s10
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
 entry:
-  %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
-  ret <8 x i8 addrspace(3)*> %insert
+  %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
+  ret <8 x ptr addrspace(3)> %insert
 }
 
 define <8 x float> @dyn_insertelement_v8f32_const_s_v_v(float %val, i32 %idx) {
@@ -624,7 +624,7 @@ entry:
   ret <8 x float> %insert
 }
 
-define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 inreg %idx) {
+define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_insertelement_v8p3i8_v_v_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    v_cmp_eq_u32_e64 vcc, s2, 0
@@ -651,8 +651,8 @@ define amdgpu_ps <8 x float> @dyn_insertelement_v8p3i8_v_v_s(<8 x i8 addrspace(3
 ; GFX10PLUS-NEXT:    v_movreld_b32_e32 v0, v8
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
 entry:
-  %insert = insertelement <8 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
-  %cast.0 = ptrtoint <8 x i8 addrspace(3)*> %insert to <8 x i32>
+  %insert = insertelement <8 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
+  %cast.0 = ptrtoint <8 x ptr addrspace(3)> %insert to <8 x i32>
   %cast.1 = bitcast <8 x i32> %cast.0 to <8 x float>
   ret <8 x float> %cast.1
 }
@@ -752,7 +752,7 @@ entry:
   ret <8 x i64> %insert
 }
 
-define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8 addrspace(1)*> inreg %vec, i8 addrspace(1)* inreg %val, i32 inreg %idx) {
+define amdgpu_ps <8 x ptr addrspace(1)> @dyn_insertelement_v8p1i8_s_s_s(<8 x ptr addrspace(1)> inreg %vec, ptr addrspace(1) inreg %val, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_insertelement_v8p1i8_s_s_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    s_mov_b32 s0, s2
@@ -798,8 +798,8 @@ define amdgpu_ps <8 x i8 addrspace(1)*> @dyn_insertelement_v8p1i8_s_s_s(<8 x i8
 ; GFX10PLUS-NEXT:    s_movreld_b64 s[0:1], s[18:19]
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
 entry:
-  %insert = insertelement <8 x i8 addrspace(1)*> %vec, i8 addrspace(1)* %val, i32 %idx
-  ret <8 x i8 addrspace(1)*> %insert
+  %insert = insertelement <8 x ptr addrspace(1)> %vec, ptr addrspace(1) %val, i32 %idx
+  ret <8 x ptr addrspace(1)> %insert
 }
 
 define void @dyn_insertelement_v8f64_const_s_v_v(double %val, i32 %idx) {
@@ -996,10 +996,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1208,10 +1208,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1357,10 +1357,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1418,10 +1418,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1624,10 +1624,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1748,10 +1748,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1809,10 +1809,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -1930,10 +1930,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -2538,10 +2538,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -2664,10 +2664,10 @@ entry:
   %vec.1 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   %vec.2 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 4, i32 5>
   %vec.3 = shufflevector <8 x double> %insert, <8 x double> undef, <2 x i32> <i32 6, i32 7>
-  store volatile <2 x double> %vec.0, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.1, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.2, <2 x double> addrspace(1)* undef
-  store volatile <2 x double> %vec.3, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %vec.0, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.1, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.2, ptr addrspace(1) undef
+  store volatile <2 x double> %vec.3, ptr addrspace(1) undef
   ret void
 }
 
@@ -4316,7 +4316,7 @@ entry:
   ret <7 x i32> %insert
 }
 
-define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8 addrspace(3)*> inreg %vec, i8 addrspace(3)* inreg %val, i32 inreg %idx) {
+define amdgpu_ps <7 x ptr addrspace(3)> @dyn_insertelement_v7p3i8_s_s_s(<7 x ptr addrspace(3)> inreg %vec, ptr addrspace(3) inreg %val, i32 inreg %idx) {
 ; GPRIDX-LABEL: dyn_insertelement_v7p3i8_s_s_s:
 ; GPRIDX:       ; %bb.0: ; %entry
 ; GPRIDX-NEXT:    s_cmp_eq_u32 s10, 0
@@ -4353,8 +4353,8 @@ define amdgpu_ps <7 x i8 addrspace(3)*> @dyn_insertelement_v7p3i8_s_s_s(<7 x i8
 ; GFX10PLUS-NEXT:    s_cselect_b32 s6, s9, s8
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
 entry:
-  %insert = insertelement <7 x i8 addrspace(3)*> %vec, i8 addrspace(3)* %val, i32 %idx
-  ret <7 x i8 addrspace(3)*> %insert
+  %insert = insertelement <7 x ptr addrspace(3)> %vec, ptr addrspace(3) %val, i32 %idx
+  ret <7 x ptr addrspace(3)> %insert
 }
 
 define amdgpu_ps <7 x float> @dyn_insertelement_v7f32_s_v_s(<7 x float> inreg %vec, float %val, i32 inreg %idx) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
index b858aeb433925..25f4478702e82 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
@@ -21,7 +21,7 @@ define amdgpu_kernel void @default_kernel() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_x() {
   %id = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %id, i32 addrspace(1)* undef
+  store volatile i32 %id, ptr addrspace(1) undef
   ret void
 }
 
@@ -35,7 +35,7 @@ define amdgpu_kernel void @workgroup_id_x() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_y() {
   %id = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %id, i32 addrspace(1)* undef
+  store volatile i32 %id, ptr addrspace(1) undef
   ret void
 }
 
@@ -49,7 +49,7 @@ define amdgpu_kernel void @workgroup_id_y() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_z() {
   %id = call i32 @llvm.amdgcn.workgroup.id.z()
-  store volatile i32 %id, i32 addrspace(1)* undef
+  store volatile i32 %id, ptr addrspace(1) undef
   ret void
 }
 
@@ -63,9 +63,9 @@ define amdgpu_kernel void @workgroup_id_z() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_xy() {
   %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %id0, i32 addrspace(1)* undef
+  store volatile i32 %id0, ptr addrspace(1) undef
   %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %id1, i32 addrspace(1)* undef
+  store volatile i32 %id1, ptr addrspace(1) undef
   ret void
 }
 
@@ -79,11 +79,11 @@ define amdgpu_kernel void @workgroup_id_xy() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_xyz() {
   %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %id0, i32 addrspace(1)* undef
+  store volatile i32 %id0, ptr addrspace(1) undef
   %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %id1, i32 addrspace(1)* undef
+  store volatile i32 %id1, ptr addrspace(1) undef
   %id2 = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %id2, i32 addrspace(1)* undef
+  store volatile i32 %id2, ptr addrspace(1) undef
   ret void
 }
 
@@ -97,9 +97,9 @@ define amdgpu_kernel void @workgroup_id_xyz() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_yz() {
   %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %id0, i32 addrspace(1)* undef
+  store volatile i32 %id0, ptr addrspace(1) undef
   %id1 = call i32 @llvm.amdgcn.workgroup.id.y()
-  store volatile i32 %id1, i32 addrspace(1)* undef
+  store volatile i32 %id1, ptr addrspace(1) undef
   ret void
 }
 
@@ -113,9 +113,9 @@ define amdgpu_kernel void @workgroup_id_yz() {
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @workgroup_id_xz() {
   %id0 = call i32 @llvm.amdgcn.workgroup.id.x()
-  store volatile i32 %id0, i32 addrspace(1)* undef
+  store volatile i32 %id0, ptr addrspace(1) undef
   %id1 = call i32 @llvm.amdgcn.workgroup.id.z()
-  store volatile i32 %id1, i32 addrspace(1)* undef
+  store volatile i32 %id1, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
index 41ac2b984ef10..3ce609676b8bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -O0 -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s
 
-define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind {
+define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind {
   ; HSA-VI-LABEL: name: i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -32,11 +32,11 @@ define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) noun
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i8 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind {
+define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroext %in) nounwind {
   ; HSA-VI-LABEL: name: i8_zext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -66,11 +66,11 @@ define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zero
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i8 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind {
+define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signext %in) nounwind {
   ; HSA-VI-LABEL: name: i8_sext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -100,11 +100,11 @@ define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 sign
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i8 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind {
+define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nounwind {
   ; HSA-VI-LABEL: name: i16_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -134,11 +134,11 @@ define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) no
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i16 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind {
+define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zeroext %in) nounwind {
   ; HSA-VI-LABEL: name: i16_zext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -168,11 +168,11 @@ define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 ze
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i16 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind {
+define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 signext %in) nounwind {
   ; HSA-VI-LABEL: name: i16_sext_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -202,11 +202,11 @@ define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 si
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i16 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind {
+define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nounwind {
   ; HSA-VI-LABEL: name: i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -234,11 +234,11 @@ define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) no
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind {
+define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) nounwind {
   ; HSA-VI-LABEL: name: f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -266,11 +266,11 @@ define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store float %in, float addrspace(1)* %out, align 4
+  store float %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
+define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) {
   ; HSA-VI-LABEL: name: v2i8_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -298,11 +298,11 @@ define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) {
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <2 x i8> %in, <2 x i8> addrspace(1)* %out
+  store <2 x i8> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) {
+define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) {
   ; HSA-VI-LABEL: name: v2i16_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -330,11 +330,11 @@ define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <2 x i16> %in, <2 x i16> addrspace(1)* %out
+  store <2 x i16> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind {
+define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v2i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -362,11 +362,11 @@ define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4
+  store <2 x i32> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind {
+define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v2f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -394,11 +394,11 @@ define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4
+  store <2 x float> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind {
+define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i8_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -426,11 +426,11 @@ define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4
+  store <3 x i8> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind {
+define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i16_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -458,11 +458,11 @@ define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4
+  store <3 x i16> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind {
+define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v3i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -490,11 +490,11 @@ define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4
+  store <3 x i32> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind {
+define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v3f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -522,11 +522,11 @@ define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4
+  store <3 x float> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
+define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) {
   ; HSA-VI-LABEL: name: v4i8_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -554,11 +554,11 @@ define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) {
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <4 x i8> %in, <4 x i8> addrspace(1)* %out
+  store <4 x i8> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) {
+define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) {
   ; HSA-VI-LABEL: name: v4i16_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -586,11 +586,11 @@ define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <4 x i16> %in, <4 x i16> addrspace(1)* %out
+  store <4 x i16> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind {
+define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v4i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -618,11 +618,11 @@ define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
+  store <4 x i32> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind {
+define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v4f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -650,11 +650,11 @@ define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4
+  store <4 x float> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
+define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) {
   ; HSA-VI-LABEL: name: v8i8_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -682,11 +682,11 @@ define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) {
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <8 x i8> %in, <8 x i8> addrspace(1)* %out
+  store <8 x i8> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) {
+define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) {
   ; HSA-VI-LABEL: name: v8i16_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -714,11 +714,11 @@ define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <8 x i16> %in, <8 x i16> addrspace(1)* %out
+  store <8 x i16> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind {
+define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v8i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -746,11 +746,11 @@ define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4
+  store <8 x i32> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind {
+define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v8f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -778,11 +778,11 @@ define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4
+  store <8 x float> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) {
+define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) {
   ; HSA-VI-LABEL: name: v16i8_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -810,11 +810,11 @@ define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <16 x i8> %in, <16 x i8> addrspace(1)* %out
+  store <16 x i8> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) {
+define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) {
   ; HSA-VI-LABEL: name: v16i16_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -842,11 +842,11 @@ define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16>
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <16 x i16> %in, <16 x i16> addrspace(1)* %out
+  store <16 x i16> %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind {
+define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32> %in) nounwind {
   ; HSA-VI-LABEL: name: v16i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -874,11 +874,11 @@ define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4
+  store <16 x i32> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind {
+define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x float> %in) nounwind {
   ; HSA-VI-LABEL: name: v16f32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -906,11 +906,11 @@ define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out,
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4
+  store <16 x float> %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
+define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind {
   ; HSA-VI-LABEL: name: kernel_arg_i64
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -937,11 +937,11 @@ define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwi
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i64 %a, i64 addrspace(1)* %out, align 8
+  store i64 %a, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in) {
+define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double  %in) {
   ; HSA-VI-LABEL: name: f64_kernel_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -969,11 +969,11 @@ define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double  %in
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
 entry:
-  store double %in, double addrspace(1)* %out
+  store double %in, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1000,11 +1000,11 @@ define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i1 %x, i1 addrspace(1)* %out, align 1
+  store i1 %x, ptr addrspace(1) %out, align 1
   ret void
 }
 
-define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_zext_i32
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1034,11 +1034,11 @@ define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i1 %x to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_zext_i64
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1068,11 +1068,11 @@ define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = zext i1 %x to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
+  store i64 %ext, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_sext_i32
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1102,11 +1102,11 @@ define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwi
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i1 %x to i32
-  store i32 %ext, i32addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind {
+define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwind {
   ; HSA-VI-LABEL: name: i1_arg_sext_i64
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1136,7 +1136,7 @@ define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwi
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %ext = sext i1 %x to i64
-  store i64 %ext, i64 addrspace(1)* %out, align 8
+  store i64 %ext, ptr addrspace(1) %out, align 8
   ret void
 }
 
@@ -1152,7 +1152,7 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
   ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1163,9 +1163,9 @@ define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i32 %arg1, i32 addrspace(1)* undef
+  store i32 %arg1, ptr addrspace(1) undef
   ret void
 }
 
@@ -1179,7 +1179,7 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
   ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: empty_array_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1190,9 +1190,9 @@ define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i32 %arg1, i32 addrspace(1)* undef
+  store i32 %arg1, ptr addrspace(1) undef
   ret void
 }
 
@@ -1226,13 +1226,11 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
   ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
   ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
   ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1255,27 +1253,25 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad,
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %val0 = extractvalue {i32, i64} %arg0, 0
   %val1 = extractvalue {i32, i64} %arg0, 1
   %val2 = extractvalue {i32, i64} %arg1, 0
   %val3 = extractvalue {i32, i64} %arg1, 1
-  store volatile i32 %val0, i32 addrspace(1)* null
-  store volatile i64 %val1, i64 addrspace(1)* null
-  store volatile i8 %pad, i8 addrspace(1)* null
-  store volatile i32 %val2, i32 addrspace(1)* null
-  store volatile i64 %val3, i64 addrspace(1)* null
+  store volatile i32 %val0, ptr addrspace(1) null
+  store volatile i64 %val1, ptr addrspace(1) null
+  store volatile i8 %pad, ptr addrspace(1) null
+  store volatile i32 %val2, ptr addrspace(1) null
+  store volatile i64 %val3, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
+define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) {
   ; HSA-VI-LABEL: name: pointer_in_struct_argument
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1297,14 +1293,11 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr
   ; HSA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
   ; HSA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4)
   ; HSA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1327,24 +1320,21 @@ define amdgpu_kernel void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addr
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI-NEXT:   [[COPY3:%[0-9]+]]:_(p1) = COPY [[C5]](p1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[COPY1]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[COPY2]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[COPY3]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
-  %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
-  %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
-  %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
-  store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
-  store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
-  store volatile i8 %pad, i8 addrspace(1)* null
-  store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
-  store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
+  %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0
+  %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1
+  %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0
+  %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1
+  store volatile ptr addrspace(3) %val0, ptr addrspace(1) null
+  store volatile ptr addrspace(1) %val1, ptr addrspace(1) null
+  store volatile i8 %pad, ptr addrspace(1) null
+  store volatile ptr addrspace(3) %val2, ptr addrspace(1) null
+  store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null
   ret void
 }
 
@@ -1369,11 +1359,10 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
   ; HSA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
   ; HSA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
   ; HSA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; HSA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment
   ; LEGACY-MESA-VI: bb.1 (%ir-block.1):
@@ -1393,24 +1382,23 @@ define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0,
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; LEGACY-MESA-VI-NEXT:   [[COPY1:%[0-9]+]]:_(p1) = COPY [[C4]](p1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `i32 addrspace(1)* null`, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[COPY1]](p1) :: (volatile store (s64) into `i64 addrspace(1)* null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
   %val0 = extractvalue <{i32, i64}> %arg0, 0
   %val1 = extractvalue <{i32, i64}> %arg0, 1
   %val2 = extractvalue <{i32, i64}> %arg1, 0
   %val3 = extractvalue <{i32, i64}> %arg1, 1
-  store volatile i32 %val0, i32 addrspace(1)* null
-  store volatile i64 %val1, i64 addrspace(1)* null
-  store volatile i32 %val2, i32 addrspace(1)* null
-  store volatile i64 %val3, i64 addrspace(1)* null
+  store volatile i32 %val0, ptr addrspace(1) null
+  store volatile i64 %val1, ptr addrspace(1) null
+  store volatile i32 %val2, ptr addrspace(1) null
+  store volatile i64 %val3, ptr addrspace(1) null
   ret void
 }
 
-define amdgpu_kernel void @unused_i32_arg(i32 addrspace(1)* nocapture %out, i32 %unused, i32 %in) nounwind {
+define amdgpu_kernel void @unused_i32_arg(ptr addrspace(1) nocapture %out, i32 %unused, i32 %in) nounwind {
   ; HSA-VI-LABEL: name: unused_i32_arg
   ; HSA-VI: bb.1.entry:
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1428,7 +1416,7 @@ entry:
 }
 
 ; Byref pointers should only be treated as offsets from kernarg
-define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %out, i8 addrspace(4)* byref(i8) %in.byref) {
+define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1457,13 +1445,13 @@ define amdgpu_kernel void @byref_constant_i8_arg(i32 addrspace(1)* nocapture %ou
   ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i8, i8 addrspace(4)* %in.byref
+  %in = load i8, ptr addrspace(4) %in.byref
   %ext = zext i8 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %out, i16 addrspace(4)* byref(i16) align 2 %in.byref) {
+define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) align 2 %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i16_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1492,13 +1480,13 @@ define amdgpu_kernel void @byref_constant_i16_arg(i32 addrspace(1)* nocapture %o
   ; LEGACY-MESA-VI-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i16, i16 addrspace(4)* %in.byref
+  %in = load i16, ptr addrspace(4) %in.byref
   %ext = zext i16 %in to i32
-  store i32 %ext, i32 addrspace(1)* %out, align 4
+  store i32 %ext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align 4 %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align 4 %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1533,13 +1521,13 @@ define amdgpu_kernel void @byref_constant_i32_arg(i32 addrspace(1)* nocapture %o
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(4)* %in.byref
-  store volatile i32 %in, i32 addrspace(1)* %out, align 4
-  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(4) %in.byref
+  store volatile i32 %in, ptr addrspace(1) %out, align 4
+  store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> addrspace(4)* byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_constant_v4i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1555,7 +1543,7 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
   ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
   ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1572,16 +1560,15 @@ define amdgpu_kernel void @byref_constant_v4i32_arg(<4 x i32> addrspace(1)* noca
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<4 x s32>) from %ir.in.byref, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.cast, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load <4 x i32>, <4 x i32> addrspace(4)* %in.byref
-  store volatile <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
-  %out.cast = bitcast <4 x i32> addrspace(1)* %out to i32 addrspace(1)*
-  store volatile i32 %after.offset, i32 addrspace(1)* %out.cast, align 4
+  %in = load <4 x i32>, ptr addrspace(4) %in.byref
+  store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4
+  store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(256) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_align_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1616,13 +1603,13 @@ define amdgpu_kernel void @byref_align_constant_i32_arg(i32 addrspace(1)* nocapt
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(4)* %in.byref
-  store volatile i32 %in, i32 addrspace(1)* %out, align 4
-  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(4) %in.byref
+  store volatile i32 %in, ptr addrspace(1) %out, align 4
+  store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace(1)* nocapture %out, i8, <16 x i32> addrspace(4)* byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
+define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
   ; HSA-VI: bb.1 (%ir-block.1):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1637,7 +1624,7 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
   ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
   ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
   ; HSA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg
@@ -1654,18 +1641,17 @@ define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(i32 addrspace
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable load (<16 x s32>) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.cast.out, align 4, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load <16 x i32>, <16 x i32> addrspace(4)* %in.byref
-  %cast.out = bitcast i32 addrspace(1)* %out to <16 x i32> addrspace(1)*
-  store volatile <16 x i32> %in, <16 x i32> addrspace(1)* %cast.out, align 4
-  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
+  %in = load <16 x i32>, ptr addrspace(4) %in.byref
+  store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4
+  store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; Also accept byref kernel arguments with other global address spaces.
-define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_global_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1694,12 +1680,12 @@ define amdgpu_kernel void @byref_global_i32_arg(i32 addrspace(1)* nocapture %out
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(1)* %in.byref
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(1) %in.byref
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out, i32* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_flat_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1728,12 +1714,12 @@ define amdgpu_kernel void @byref_flat_i32_arg(i32 addrspace(1)* nocapture %out,
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32* %in.byref
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr %in.byref
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(6)* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1762,12 +1748,12 @@ define amdgpu_kernel void @byref_constant_32bit_i32_arg(i32 addrspace(1)* nocapt
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 6)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(6)* %in.byref
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(6) %in.byref
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(999)* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1796,13 +1782,13 @@ define amdgpu_kernel void @byref_unknown_as_i32_arg(i32 addrspace(1)* nocapture
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(999)* %in.byref
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(999) %in.byref
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; Invalid, but should not crash.
-define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(3)* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_local_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1831,12 +1817,12 @@ define amdgpu_kernel void @byref_local_i32_arg(i32 addrspace(1)* nocapture %out,
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(3)* %in.byref
-  store i32 %in, i32 addrspace(1)* %out, align 4
+  %in = load i32, ptr addrspace(3) %in.byref
+  store i32 %in, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* byref(i32) align(4) %in0.byref, i32 addrspace(4)* byref(i32) align(4) %in1.byref, i32 %after.offset) {
+define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(4) %in0.byref, ptr addrspace(4) byref(i32) align(4) %in1.byref, i32 %after.offset) {
   ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1879,15 +1865,15 @@ define amdgpu_kernel void @multi_byref_constant_i32_arg(i32 addrspace(1)* nocapt
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in0 = load i32, i32 addrspace(4)* %in0.byref
-  %in1 = load i32, i32 addrspace(4)* %in1.byref
-  store volatile i32 %in0, i32 addrspace(1)* %out, align 4
-  store volatile i32 %in1, i32 addrspace(1)* %out, align 4
-  store volatile i32 %after.offset, i32 addrspace(1)* %out, align 4
+  %in0 = load i32, ptr addrspace(4) %in0.byref
+  %in1 = load i32, ptr addrspace(4) %in1.byref
+  store volatile i32 %in0, ptr addrspace(1) %out, align 4
+  store volatile i32 %in1, ptr addrspace(1) %out, align 4
+  store volatile i32 %after.offset, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byref(i32) align(4) %in.byref) {
+define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) align(4) %in.byref) {
   ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1897,7 +1883,7 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
   ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1908,14 +1894,14 @@ define amdgpu_kernel void @byref_constant_i32_arg_offset0(i32 addrspace(4)* byre
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 4)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  %in = load i32, i32 addrspace(4)* %in.byref
-  store i32 %in, i32 addrspace(1)* undef, align 4
+  %in = load i32, ptr addrspace(4) %in.byref
+  store i32 %in, ptr addrspace(1) undef, align 4
   ret void
 }
 
-define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
+define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind {
   ; HSA-VI-LABEL: name: p3i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1938,11 +1924,11 @@ define amdgpu_kernel void @p3i8_arg(i8 addrspace(3)* %arg) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
   ; LEGACY-MESA-VI-NEXT:   G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i8 9, i8 addrspace(3)* %arg, align 4
+  store i8 9, ptr addrspace(3) %arg, align 4
   ret void
 }
 
-define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
+define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind {
   ; HSA-VI-LABEL: name: p1i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1950,7 +1936,7 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
   ; HSA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5
   ; HSA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
   ; HSA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
-  ; HSA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
+  ; HSA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: p1i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1959,13 +1945,13 @@ define amdgpu_kernel void @p1i8_arg(i8 addrspace(1)* %arg) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr0_sgpr1
   ; LEGACY-MESA-VI-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9
   ; LEGACY-MESA-VI-NEXT:   [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `i8 addrspace(3)* null`, addrspace 3)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store i8 9, i8 addrspace(3)* null
+  store i8 9, ptr addrspace(3) null
   ret void
 }
 
-define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
+define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p1i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -1975,7 +1961,7 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
   ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -1986,13 +1972,13 @@ define amdgpu_kernel void @v2p1i8_arg(<2 x i8 addrspace(1)*> %arg) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store <2 x i8 addrspace(1)*> %arg, <2 x i8 addrspace(1)*> addrspace(1)* undef
+  store <2 x ptr addrspace(1)> %arg, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
+define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p3i8_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -2002,7 +1988,7 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
   ; HSA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; HSA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -2013,13 +1999,13 @@ define amdgpu_kernel void @v2p3i8_arg(<2 x i8 addrspace(3)*> %arg) nounwind {
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store <2 x i8 addrspace(3)*> %arg, <2 x i8 addrspace(3)*> addrspace(1)* undef
+  store <2 x ptr addrspace(3)> %arg, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) nounwind {
+define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) nounwind {
   ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg
   ; HSA-VI: bb.1 (%ir-block.0):
   ; HSA-VI-NEXT:   liveins: $sgpr4_sgpr5
@@ -2032,10 +2018,10 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i
   ; HSA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
   ; HSA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4)
   ; HSA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; HSA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
   ; HSA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; HSA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1)
   ; HSA-VI-NEXT:   S_ENDPGM 0
   ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg
   ; LEGACY-MESA-VI: bb.1 (%ir-block.0):
@@ -2049,11 +2035,11 @@ define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64)
   ; LEGACY-MESA-VI-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4)
   ; LEGACY-MESA-VI-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
   ; LEGACY-MESA-VI-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; LEGACY-MESA-VI-NEXT:   G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1)
   ; LEGACY-MESA-VI-NEXT:   S_ENDPGM 0
-  store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
+  store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index 7445bb3b2a3b2..5169c9b634d3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -27,7 +27,7 @@ define amdgpu_vs void @test_f32(float %arg0) {
   ret void
 }
 
-define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
+define amdgpu_vs void @test_ptr2_inreg(ptr addrspace(4) inreg %arg0) {
   ; CHECK-LABEL: name: test_ptr2_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3
@@ -37,11 +37,11 @@ define amdgpu_vs void @test_ptr2_inreg(i32 addrspace(4)* inreg %arg0) {
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p4) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg0, addrspace 4)
   ; CHECK-NEXT:   S_ENDPGM 0
-  %tmp0 = load volatile i32, i32 addrspace(4)* %arg0
+  %tmp0 = load volatile i32, ptr addrspace(4) %arg0
   ret void
 }
 
-define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)* inreg %arg1) {
+define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, ptr addrspace(4) inreg %arg1) {
   ; CHECK-LABEL: name: test_sgpr_alignment0
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4
@@ -54,7 +54,7 @@ define amdgpu_vs void @test_sgpr_alignment0(float inreg %arg0, i32 addrspace(4)*
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[MV]](p4) :: (volatile invariant load (s32) from %ir.arg1, addrspace 4)
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 32, 15, [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), 0, 0
   ; CHECK-NEXT:   S_ENDPGM 0
-  %tmp0 = load volatile i32, i32 addrspace(4)* %arg1
+  %tmp0 = load volatile i32, ptr addrspace(4) %arg1
   call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %arg0, float undef, float undef, float undef, i1 false, i1 false) #0
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
index aa19fc8b26fca..55318eae4d72f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
 
 ; TODO: Could potentially insert it here
-define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) {
+define void @arg_align_8(ptr addrspace(1) align 8 %arg0) {
   ; CHECK-LABEL: name: arg_align_8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -13,12 +13,12 @@ define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) {
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
   ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i8 0, i8 addrspace(1)* %arg0, align 8
+  store i8 0, ptr addrspace(1) %arg0, align 8
   ret void
 }
 
-declare i8 addrspace(1)* @returns_ptr()
-declare align 8 i8 addrspace(1)* @returns_ptr_align8()
+declare ptr addrspace(1) @returns_ptr()
+declare align 8 ptr addrspace(1) @returns_ptr_align8()
 
 define void @call_result_align_1() {
   ; CHECK-LABEL: name: call_result_align_1
@@ -64,8 +64,8 @@ define void @call_result_align_1() {
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = call align 1 i8 addrspace(1)* @returns_ptr()
-  store i8 0, i8 addrspace(1)* %ptr, align 1
+  %ptr = call align 1 ptr addrspace(1) @returns_ptr()
+  store i8 0, ptr addrspace(1) %ptr, align 1
   ret void
 }
 
@@ -114,8 +114,8 @@ define void @call_result_align_8() {
   ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
   ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = call align 8 i8 addrspace(1)* @returns_ptr()
-  store i8 0, i8 addrspace(1)* %ptr, align 8
+  %ptr = call align 8 ptr addrspace(1) @returns_ptr()
+  store i8 0, ptr addrspace(1) %ptr, align 8
   ret void
 }
 
@@ -164,12 +164,12 @@ define void @declaration_result_align_8() {
   ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
   ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %ptr = call i8 addrspace(1)* @returns_ptr_align8()
-  store i8 0, i8 addrspace(1)* %ptr, align 8
+  %ptr = call ptr addrspace(1) @returns_ptr_align8()
+  store i8 0, ptr addrspace(1) %ptr, align 8
   ret void
 }
 
-define i8 addrspace(1)* @tail_call_assert_align() {
+define ptr addrspace(1) @tail_call_assert_align() {
   ; CHECK-LABEL: name: tail_call_assert_align
   ; CHECK: bb.1.entry:
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -206,6 +206,6 @@ define i8 addrspace(1)* @tail_call_assert_align() {
   ; CHECK-NEXT:   $vgpr31 = COPY [[COPY17]](s32)
   ; CHECK-NEXT:   SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
 entry:
-  %call = tail call i8 addrspace(1)* @returns_ptr_align8()
-  ret i8 addrspace(1)* %call
+  %call = tail call ptr addrspace(1) @returns_ptr_align8()
+  ret ptr addrspace(1) %call
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
index 5206284cf5723..e5c11381865f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-atomicrmw.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -o - %s | FileCheck %s
 
-define float @test_atomicrmw_fadd(float addrspace(3)* %addr) {
+define float @test_atomicrmw_fadd(ptr addrspace(3) %addr) {
   ; CHECK-LABEL: name: test_atomicrmw_fadd
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -11,11 +11,11 @@ define float @test_atomicrmw_fadd(float addrspace(3)* %addr) {
   ; CHECK-NEXT:   [[ATOMICRMW_FADD:%[0-9]+]]:_(s32) = G_ATOMICRMW_FADD [[COPY]](p3), [[C]] :: (load store seq_cst (s32) on %ir.addr, addrspace 3)
   ; CHECK-NEXT:   $vgpr0 = COPY [[ATOMICRMW_FADD]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %oldval = atomicrmw fadd float addrspace(3)* %addr, float 1.0 seq_cst
+  %oldval = atomicrmw fadd ptr addrspace(3) %addr, float 1.0 seq_cst
   ret float %oldval
 }
 
-define float @test_atomicrmw_fsub(float addrspace(3)* %addr) {
+define float @test_atomicrmw_fsub(ptr addrspace(3) %addr) {
   ; CHECK-LABEL: name: test_atomicrmw_fsub
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
@@ -33,7 +33,7 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) {
   ; CHECK-NEXT:   [[PHI:%[0-9]+]]:_(s64) = G_PHI %8(s64), %bb.2, [[C1]](s64), %bb.1
   ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[LOAD]](s32), %bb.1, %6(s32), %bb.2
   ; CHECK-NEXT:   [[FSUB:%[0-9]+]]:_(s32) = G_FSUB [[PHI1]], [[C]]
-  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.2, addrspace 3)
+  ; CHECK-NEXT:   [[ATOMIC_CMPXCHG_WITH_SUCCESS:%[0-9]+]]:_(s32), [[ATOMIC_CMPXCHG_WITH_SUCCESS1:%[0-9]+]]:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS [[COPY]](p3), [[PHI1]], [[FSUB]] :: (load store seq_cst seq_cst (s32) on %ir.addr, addrspace 3)
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s64) = G_INTRINSIC intrinsic(@llvm.amdgcn.if.break), [[ATOMIC_CMPXCHG_WITH_SUCCESS1]](s1), [[PHI]](s64)
   ; CHECK-NEXT:   [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.loop), [[INT]](s64)
   ; CHECK-NEXT:   G_BRCOND [[INT1]](s1), %bb.3
@@ -45,6 +45,6 @@ define float @test_atomicrmw_fsub(float addrspace(3)* %addr) {
   ; CHECK-NEXT:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI3]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[PHI2]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %oldval = atomicrmw fsub float addrspace(3)* %addr, float 1.0 seq_cst
+  %oldval = atomicrmw fsub ptr addrspace(3) %addr, float 1.0 seq_cst
   ret float %oldval
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
index 4103b8055e388..a9516450834a9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-non-fixed.ll
@@ -64,7 +64,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -80,8 +80,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
   ret void
 }
@@ -90,7 +90,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -106,8 +106,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
index 4ded7eba7ab15..572b2e5a8b245 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll
@@ -26,11 +26,11 @@ declare half @external_f16_func_void() #0
 declare float @external_f32_func_void() #0
 declare double @external_f64_func_void() #0
 
-declare i8 addrspace(1)* @external_p1_func_void() #0
-declare <2 x i8 addrspace(1)*> @external_v2p1_func_void() #0
+declare ptr addrspace(1) @external_p1_func_void() #0
+declare <2 x ptr addrspace(1)> @external_v2p1_func_void() #0
 
-declare i8 addrspace(3)* @external_p3_func_void() #0
-declare <2 x i8 addrspace(3)*> @external_v2p3_func_void() #0
+declare ptr addrspace(3) @external_p3_func_void() #0
+declare <2 x ptr addrspace(3)> @external_v2p3_func_void() #0
 
 declare <2 x half> @external_v2f16_func_void() #0
 declare <3 x half> @external_v3f16_func_void() #0
@@ -47,7 +47,7 @@ declare <8 x i32> @external_v8i32_func_void() #0
 declare <16 x i32> @external_v16i32_func_void() #0
 declare <32 x i32> @external_v32i32_func_void() #0
 declare <33 x i32> @external_v33i32_func_void() #0
-declare <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)*, i32) #0
+declare <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1), i32) #0
 declare { <32 x i32>, i32 } @external_v32i32_i32_func_void() #0
 declare { i32, <32 x i32> } @external_i32_v32i32_func_void() #0
 
@@ -67,7 +67,7 @@ declare { i32, i64 } @external_gfx_i32_i64_func_void() #0
 declare hidden i32 @external_gfx_i32_func_i32(i32) #0
 
 
-define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
   ; GCN-LABEL: name: test_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -84,7 +84,7 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset.cast, align 16, addrspace 4)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.out.kernarg.offset1, align 16, addrspace 4)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_func_i32
   ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -124,11 +124,11 @@ define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)*
   ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out.load, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_i32(i32 42)
-  store volatile i32 %val, i32 addrspace(1)* %out
+  store volatile i32 %val, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 {
+define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(ptr addrspace(1) %out) #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i32_func_i32_imm
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -148,7 +148,7 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_i32_imm(i32 addrspace(1)
   ; GCN-NEXT:   G_STORE [[COPY3]](s32), [[MV]](p1) :: (volatile store (s32) into %ir.out, addrspace 1)
   ; GCN-NEXT:   SI_RETURN
   %val = call amdgpu_gfx i32 @external_gfx_i32_func_i32(i32 42)
-  store volatile i32 %val, i32 addrspace(1)* %out
+  store volatile i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -204,10 +204,10 @@ define amdgpu_kernel void @test_call_external_i1_func_void() #0 {
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_func_void()
-  store volatile i1 %val, i1 addrspace(1)* undef
+  store volatile i1 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -223,10 +223,10 @@ define amdgpu_gfx void @test_gfx_call_external_i1_func_void() #0 {
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   SI_RETURN
   %val = call amdgpu_gfx i1 @external_gfx_i1_func_void()
-  store volatile i1 %val, i1 addrspace(1)* undef
+  store volatile i1 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -284,11 +284,11 @@ define amdgpu_kernel void @test_call_external_i1_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
-  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_zeroext_func_void()
   %val.ext = zext i1 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -346,11 +346,11 @@ define amdgpu_kernel void @test_call_external_i1_signext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
-  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i1 @external_i1_signext_func_void()
   %val.ext = sext i1 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -407,10 +407,10 @@ define amdgpu_kernel void @test_call_external_i8_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_func_void()
-  store volatile i8 %val, i8 addrspace(1)* undef
+  store volatile i8 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -427,10 +427,10 @@ define amdgpu_gfx void @test_gfx_call_external_i8_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
   ; GCN-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   SI_RETURN
   %val = call amdgpu_gfx i8 @external_gfx_i8_func_void()
-  store volatile i8 %val, i8 addrspace(1)* undef
+  store volatile i8 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -488,11 +488,11 @@ define amdgpu_kernel void @test_call_external_i8_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
-  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_zeroext_func_void()
   %val.ext = zext i8 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -550,11 +550,11 @@ define amdgpu_kernel void @test_call_external_i8_signext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
-  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i8 @external_i8_signext_func_void()
   %val.ext = sext i8 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -610,10 +610,10 @@ define amdgpu_kernel void @test_call_external_i16_func_void() #0 {
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_func_void()
-  store volatile i16 %val, i16 addrspace(1)* undef
+  store volatile i16 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -671,11 +671,11 @@ define amdgpu_kernel void @test_call_external_i16_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
-  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_zeroext_func_void()
   %val.ext = zext i16 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -733,11 +733,11 @@ define amdgpu_kernel void @test_call_external_i16_signext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
-  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i16 @external_i16_signext_func_void()
   %val.ext = sext i16 %val to i32
-  store volatile i32 %val.ext, i32 addrspace(1)* undef
+  store volatile i32 %val.ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_call_external_i32_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i32 @external_i32_func_void()
-  store volatile i32 %val, i32 addrspace(1)* undef
+  store volatile i32 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -810,10 +810,10 @@ define amdgpu_gfx void @test_gfx_call_external_i32_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   SI_RETURN
   %val = call amdgpu_gfx i32 @external_gfx_i32_func_void()
-  store volatile i32 %val, i32 addrspace(1)* undef
+  store volatile i32 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -871,10 +871,10 @@ define amdgpu_kernel void @test_call_external_i48_func_void() #0 {
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (volatile store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_func_void()
-  store volatile i48 %val, i48 addrspace(1)* undef
+  store volatile i48 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -933,11 +933,11 @@ define amdgpu_kernel void @test_call_external_i48_zeroext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
-  ; GCN-NEXT:   G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[ZEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_zeroext_func_void()
   %ext = zext i48 %val to i64
-  store volatile i64 %ext, i64 addrspace(1)* undef
+  store volatile i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -996,11 +996,11 @@ define amdgpu_kernel void @test_call_external_i48_signext_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
-  ; GCN-NEXT:   G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[SEXT]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i48 @external_i48_signext_func_void()
   %ext = sext i48 %val to i64
-  store volatile i64 %ext, i64 addrspace(1)* undef
+  store volatile i64 %ext, ptr addrspace(1) undef
   ret void
 }
 
@@ -1057,10 +1057,10 @@ define amdgpu_kernel void @test_call_external_i64_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call i64 @external_i64_func_void()
-  store volatile i64 %val, i64 addrspace(1)* undef
+  store volatile i64 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1117,10 +1117,10 @@ define amdgpu_kernel void @test_call_external_p1_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (volatile store (p1) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
-  %val = call i8 addrspace(1)* @external_p1_func_void()
-  store volatile i8 addrspace(1)* %val, i8 addrspace(1)* addrspace(1)* undef
+  %val = call ptr addrspace(1) @external_p1_func_void()
+  store volatile ptr addrspace(1) %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1181,10 +1181,10 @@ define amdgpu_kernel void @test_call_external_v2p1_func_void() #0 {
   ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (volatile store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
-  %val = call <2 x i8 addrspace(1)*> @external_v2p1_func_void()
-  store volatile <2 x i8 addrspace(1)*> %val, <2 x i8 addrspace(1)*> addrspace(1)* undef
+  %val = call <2 x ptr addrspace(1)> @external_v2p1_func_void()
+  store volatile <2 x ptr addrspace(1)> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1239,10 +1239,10 @@ define amdgpu_kernel void @test_call_external_p3_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_p3_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(p3) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(3)* undef`, addrspace 3)
+  ; GCN-NEXT:   G_STORE [[COPY21]](p3), [[DEF]](p3) :: (volatile store (p3) into `ptr addrspace(3) undef`, addrspace 3)
   ; GCN-NEXT:   S_ENDPGM 0
-  %val = call i8 addrspace(3)* @external_p3_func_void()
-  store volatile i8 addrspace(3)* %val, i8 addrspace(3)* addrspace(3)* undef
+  %val = call ptr addrspace(3) @external_p3_func_void()
+  store volatile ptr addrspace(3) %val, ptr addrspace(3) undef
   ret void
 }
 
@@ -1299,10 +1299,10 @@ define amdgpu_kernel void @test_call_external_v2p3_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(p3) = COPY $vgpr1
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY21]](p3), [[COPY22]](p3)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(3)* undef`, addrspace 3)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p3) :: (volatile store (<2 x p3>) into `ptr addrspace(3) undef`, addrspace 3)
   ; GCN-NEXT:   S_ENDPGM 0
-  %val = call <2 x i8 addrspace(3)*> @external_v2p3_func_void()
-  store volatile <2 x i8 addrspace(3)*> %val, <2 x i8 addrspace(3)*> addrspace(3)* undef
+  %val = call <2 x ptr addrspace(3)> @external_v2p3_func_void()
+  store volatile <2 x ptr addrspace(3)> %val, ptr addrspace(3) undef
   ret void
 }
 
@@ -1358,10 +1358,10 @@ define amdgpu_kernel void @test_call_external_f16_func_void() #0 {
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY21]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call half @external_f16_func_void()
-  store volatile half %val, half addrspace(1)* undef
+  store volatile half %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1416,10 +1416,10 @@ define amdgpu_kernel void @test_call_external_f32_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_f32_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `float addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call float @external_f32_func_void()
-  store volatile float %val, float addrspace(1)* undef
+  store volatile float %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1476,10 +1476,10 @@ define amdgpu_kernel void @test_call_external_f64_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `double addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call double @external_f64_func_void()
-  store volatile double %val, double addrspace(1)* undef
+  store volatile double %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1540,10 +1540,10 @@ define amdgpu_kernel void @test_call_external_v2f64_func_void() #0 {
   ; GCN-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x double> @external_v2f64_func_void()
-  store volatile <2 x double> %val, <2 x double> addrspace(1)* undef
+  store volatile <2 x double> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1600,10 +1600,10 @@ define amdgpu_kernel void @test_call_external_v2i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i32> @external_v2i32_func_void()
-  store volatile <2 x i32> %val, <2 x i32> addrspace(1)* undef
+  store volatile <2 x i32> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1661,10 +1661,10 @@ define amdgpu_kernel void @test_call_external_v3i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x i32> @external_v3i32_func_void()
-  store volatile <3 x i32> %val, <3 x i32> addrspace(1)* undef, align 8
+  store volatile <3 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -1723,10 +1723,10 @@ define amdgpu_kernel void @test_call_external_v4i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x i32> @external_v4i32_func_void()
-  store volatile <4 x i32> %val, <4 x i32> addrspace(1)* undef, align 8
+  store volatile <4 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -1786,10 +1786,10 @@ define amdgpu_kernel void @test_call_external_v5i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <5 x i32> @external_v5i32_func_void()
-  store volatile <5 x i32> %val, <5 x i32> addrspace(1)* undef, align 8
+  store volatile <5 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -1852,10 +1852,10 @@ define amdgpu_kernel void @test_call_external_v8i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr7
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <8 x i32> @external_v8i32_func_void()
-  store volatile <8 x i32> %val, <8 x i32> addrspace(1)* undef, align 8
+  store volatile <8 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -1926,10 +1926,10 @@ define amdgpu_kernel void @test_call_external_v16i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY36:%[0-9]+]]:_(s32) = COPY $vgpr15
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <16 x i32> @external_v16i32_func_void()
-  store volatile <16 x i32> %val, <16 x i32> addrspace(1)* undef, align 8
+  store volatile <16 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -2016,10 +2016,10 @@ define amdgpu_kernel void @test_call_external_v32i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY52:%[0-9]+]]:_(s32) = COPY $vgpr31
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32), [[COPY32]](s32), [[COPY33]](s32), [[COPY34]](s32), [[COPY35]](s32), [[COPY36]](s32), [[COPY37]](s32), [[COPY38]](s32), [[COPY39]](s32), [[COPY40]](s32), [[COPY41]](s32), [[COPY42]](s32), [[COPY43]](s32), [[COPY44]](s32), [[COPY45]](s32), [[COPY46]](s32), [[COPY47]](s32), [[COPY48]](s32), [[COPY49]](s32), [[COPY50]](s32), [[COPY51]](s32), [[COPY52]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <32 x i32> @external_v32i32_func_void()
-  store volatile <32 x i32> %val, <32 x i32> addrspace(1)* undef, align 8
+  store volatile <32 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -2074,10 +2074,10 @@ define amdgpu_kernel void @test_call_external_v2i16_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2i16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x i16> @external_v2i16_func_void()
-  store volatile <2 x i16> %val, <2 x i16> addrspace(1)* undef
+  store volatile <2 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2136,10 +2136,10 @@ define amdgpu_kernel void @test_call_external_v3i16_func_void() #0 {
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x i16> @external_v3i16_func_void()
-  store volatile <3 x i16> %val, <3 x i16> addrspace(1)* undef
+  store volatile <3 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2196,10 +2196,10 @@ define amdgpu_kernel void @test_call_external_v4i16_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
   ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x i16> @external_v4i16_func_void()
-  store volatile <4 x i16> %val, <4 x i16> addrspace(1)* undef
+  store volatile <4 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2254,10 +2254,10 @@ define amdgpu_kernel void @test_call_external_v2f16_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v2f16_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <2 x half> @external_v2f16_func_void()
-  store volatile <2 x half> %val, <2 x half> addrspace(1)* undef
+  store volatile <2 x half> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2316,10 +2316,10 @@ define amdgpu_kernel void @test_call_external_v3f16_func_void() #0 {
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (volatile store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x half> @external_v3f16_func_void()
-  store volatile <3 x half> %val, <3 x half> addrspace(1)* undef
+  store volatile <3 x half> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2376,10 +2376,10 @@ define amdgpu_kernel void @test_call_external_v4f16_func_void() #0 {
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
   ; GCN-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY21]](<2 x s16>), [[COPY22]](<2 x s16>)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (volatile store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <4 x half> @external_v4f16_func_void()
-  store volatile <4 x half> %val, <4 x half> addrspace(1)* undef
+  store volatile <4 x half> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2437,10 +2437,10 @@ define amdgpu_kernel void @test_call_external_v3f32_func_void() #0 {
   ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (volatile store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <3 x float> @external_v3f32_func_void()
-  store volatile <3 x float> %val, <3 x float> addrspace(1)* undef
+  store volatile <3 x float> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2500,10 +2500,10 @@ define amdgpu_kernel void @test_call_external_v5f32_func_void() #0 {
   ; GCN-NEXT:   [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr4
   ; GCN-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `<5 x float> addrspace(1)* undef`, align 32, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (volatile store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <5 x float> @external_v5f32_func_void()
-  store volatile <5 x float> %val, <5 x float> addrspace(1)* undef
+  store volatile <5 x float> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -2524,53 +2524,52 @@ define amdgpu_kernel void @test_call_external_i32_i64_func_void() #0 {
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_i64_func_void
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_i64_func_void, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY23]](s32), [[COPY24]](s32)
+  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[MV]](s64), [[COPY10]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call { i32, i64 } @external_i32_i64_func_void()
   %val.0 = extractvalue { i32, i64 } %val, 0
   %val.1 = extractvalue { i32, i64 } %val, 1
-  store volatile i32 %val.0, i32 addrspace(1)* undef
-  store volatile i64 %val.1, i64 addrspace(1)* undef
+  store volatile i32 %val.0, ptr addrspace(1) undef
+  store volatile i64 %val.1, ptr addrspace(1) undef
   ret void
 }
 
@@ -2578,25 +2577,24 @@ define amdgpu_gfx void @test_gfx_call_external_i32_i64_func_void() #0 {
   ; GCN-LABEL: name: test_gfx_call_external_i32_i64_func_void
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_gfx_i32_i64_func_void
-  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY1]](<4 x s32>)
+  ; GCN-NEXT:   [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY]](<4 x s32>)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_i32_i64_func_void, csr_amdgpu_si_gfx, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2
-  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GCN-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32)
+  ; GCN-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; GCN-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; GCN-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; GCN-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY2]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[MV]](s64), [[COPY]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   SI_RETURN
   %val = call amdgpu_gfx { i32, i64 } @external_gfx_i32_i64_func_void()
   %val.0 = extractvalue { i32, i64 } %val, 0
   %val.1 = extractvalue { i32, i64 } %val, 1
-  store volatile i32 %val.0, i32 addrspace(1)* undef
-  store volatile i64 %val.1, i64 addrspace(1)* undef
+  store volatile i32 %val.0, ptr addrspace(1) undef
+  store volatile i64 %val.1, ptr addrspace(1) undef
   ret void
 }
 
@@ -2652,14 +2650,14 @@ define amdgpu_kernel void @test_call_external_a2i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; GCN-NEXT:   [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY21]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[COPY22]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call [2 x i32] @external_a2i32_func_void()
   %val.0 = extractvalue [2 x i32] %val, 0
   %val.1 = extractvalue [2 x i32] %val, 1
-  store volatile i32 %val.0, i32 addrspace(1)* undef
-  store volatile i32 %val.1, i32 addrspace(1)* undef
+  store volatile i32 %val.0, ptr addrspace(1) undef
+  store volatile i32 %val.1, ptr addrspace(1) undef
   ret void
 }
 
@@ -2728,11 +2726,11 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
   ; GCN-NEXT:   [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[COPY25]](s32)
   ; GCN-NEXT:   [[TRUNC9:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC8]](s16)
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
-  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC3]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC5]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC7]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[TRUNC9]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call [5 x i8] @external_a5i8_func_void()
   %val.0 = extractvalue [5 x i8] %val, 0
@@ -2740,11 +2738,11 @@ define amdgpu_kernel void @test_call_external_a5i8_func_void() #0 {
   %val.2 = extractvalue [5 x i8] %val, 2
   %val.3 = extractvalue [5 x i8] %val, 3
   %val.4 = extractvalue [5 x i8] %val, 4
-  store volatile i8 %val.0, i8 addrspace(1)* undef
-  store volatile i8 %val.1, i8 addrspace(1)* undef
-  store volatile i8 %val.2, i8 addrspace(1)* undef
-  store volatile i8 %val.3, i8 addrspace(1)* undef
-  store volatile i8 %val.4, i8 addrspace(1)* undef
+  store volatile i8 %val.0, ptr addrspace(1) undef
+  store volatile i8 %val.1, ptr addrspace(1) undef
+  store volatile i8 %val.2, ptr addrspace(1) undef
+  store volatile i8 %val.3, ptr addrspace(1) undef
+  store volatile i8 %val.4, ptr addrspace(1) undef
   ret void
 }
 
@@ -2764,39 +2762,38 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v32i32_i32_func_void
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v32i32_i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
@@ -2805,14 +2802,14 @@ define amdgpu_kernel void @test_call_external_v32i32_i32_func_void() #0 {
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
   ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (load (s32) from %stack.0, align 128, addrspace 5)
-  ; GCN-NEXT:   G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call { <32 x i32>, i32 } @external_v32i32_i32_func_void()
   %val0 = extractvalue { <32 x i32>, i32 } %val, 0
   %val1 = extractvalue { <32 x i32>, i32 } %val, 1
-  store volatile <32 x i32> %val0, <32 x i32> addrspace(1)* undef, align 8
-  store volatile i32 %val1, i32 addrspace(1)* undef
+  store volatile <32 x i32> %val0, ptr addrspace(1) undef, align 8
+  store volatile i32 %val1, ptr addrspace(1) undef
   ret void
 }
 
@@ -2832,39 +2829,38 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ; GCN-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_i32_v32i32_func_void
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX]](p5)
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_i32_v32i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
@@ -2873,14 +2869,14 @@ define amdgpu_kernel void @test_call_external_i32_v32i32_func_void() #0 {
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
   ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C3]](s32)
   ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[PTR_ADD1]](p5) :: (load (<32 x s32>) from %stack.0, addrspace 5)
-  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[LOAD1]](<32 x s32>), [[COPY10]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call { i32, <32 x i32> } @external_i32_v32i32_func_void()
   %val0 = extractvalue { i32, <32 x i32> } %val, 0
   %val1 = extractvalue { i32, <32 x i32> } %val, 1
-  store volatile i32 %val0, i32 addrspace(1)* undef
-  store volatile <32 x i32> %val1, <32 x i32> addrspace(1)* undef, align 8
+  store volatile i32 %val0, ptr addrspace(1) undef
+  store volatile <32 x i32> %val1, ptr addrspace(1) undef, align 8
   ret void
 }
 
@@ -2937,14 +2933,14 @@ define amdgpu_kernel void @test_call_external_v33i32_func_void() #0 {
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_void, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
-  ; GCN-NEXT:   G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %val = call <33 x i32> @external_v33i32_func_void()
-  store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8
+  store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx) #0 {
+define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx) #0 {
   ; GCN-LABEL: name: test_call_external_v33i32_func_v33i32_i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -2961,10 +2957,10 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
   ; GCN-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset.cast, align 16, addrspace 4)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4)
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
   ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
-  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset.cast, align 8, addrspace 4)
+  ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4)
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32
@@ -3006,10 +3002,10 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(<33 x i32>
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_v33i32_func_v33i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[LOAD2:%[0-9]+]]:_(<33 x s32>) = G_LOAD [[FRAME_INDEX]](p5) :: (load (<33 x s32>) from %stack.0, align 256, addrspace 5)
-  ; GCN-NEXT:   G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 8, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD2]](<33 x s32>), [[DEF]](p1) :: (volatile store (<33 x s32>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
-  %val = call <33 x i32> @external_v33i32_func_v33i32_i32(<33 x i32> addrspace(1)* %p, i32 %idx)
-  store volatile <33 x i32> %val, <33 x i32> addrspace(1)* undef, align 8
+  %val = call <33 x i32> @external_v33i32_func_v33i32_i32(ptr addrspace(1) %p, i32 %idx)
+  store volatile <33 x i32> %val, ptr addrspace(1) undef, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
index b32e8211f05e2..8fb54005c07f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=irtranslator -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
 
 define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 {
   ; GCN-LABEL: name: test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
@@ -21,34 +21,33 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 3
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; GCN-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; GCN-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.in.val
   ; GCN-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
   ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
-  ; GCN-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.gep01, addrspace 5)
+  ; GCN-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5)
   ; GCN-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32
-  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; GCN-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; GCN-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; GCN-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; GCN-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C3]](s64)
-  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; GCN-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C3]](s64)
+  ; GCN-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; GCN-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; GCN-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; GCN-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; GCN-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; GCN-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; GCN-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C4]](s32)
-  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; GCN-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C4]](s32)
+  ; GCN-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; GCN-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C5]](s32)
+  ; GCN-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C5]](s32)
   ; GCN-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; GCN-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
   ; GCN-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -56,37 +55,37 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval
   ; GCN-NEXT:   [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; GCN-NEXT:   G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5)
   ; GCN-NEXT:   $vgpr0 = COPY [[FRAME_INDEX1]](p5)
-  ; GCN-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; GCN-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; GCN-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; GCN-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; GCN-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4)
-  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; GCN-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; GCN-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; GCN-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; GCN-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; GCN-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; GCN-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; GCN-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; GCN-NEXT:   $sgpr15 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; GCN-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; GCN-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
   ; GCN-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32)
-  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.gep02, addrspace 5)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5)
   ; GCN-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5)
-  ; GCN-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[COPY10]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   S_ENDPGM 0
   %in.val = alloca { i8, i32 }, align 4, addrspace(5)
   %out.val = alloca { i8, i32 }, align 4, addrspace(5)
-  %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0
-  %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1
-  store i8 3, i8 addrspace(5)* %in.gep0
-  store i32 8, i32 addrspace(5)* %in.gep1
-  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* %in.val)
-  %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0
-  %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1
-  %out.val0 = load i8, i8 addrspace(5)* %out.gep0
-  %out.val1 = load i32, i32 addrspace(5)* %out.gep1
-  store volatile i8 %out.val0, i8 addrspace(1)* undef
-  store volatile i32 %out.val1, i32 addrspace(1)* undef
+  %in.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 0
+  %in.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %in.val, i32 0, i32 1
+  store i8 3, ptr addrspace(5) %in.gep0
+  store i32 8, ptr addrspace(5) %in.gep1
+  call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) %out.val, ptr addrspace(5) %in.val)
+  %out.gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 0
+  %out.gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %out.val, i32 0, i32 1
+  %out.val0 = load i8, ptr addrspace(5) %out.gep0
+  %out.val1 = load i32, ptr addrspace(5) %out.gep1
+  store volatile i8 %out.val0, ptr addrspace(1) undef
+  store volatile i32 %out.val1, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 0948411064af5..b77b6609b0f8a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -29,8 +29,8 @@ declare hidden void @external_void_func_i48(i48) #0
 declare hidden void @external_void_func_i48_signext(i48 signext) #0
 declare hidden void @external_void_func_i48_zeroext(i48 zeroext) #0
 
-declare hidden void @external_void_func_p0(i8*) #0
-declare hidden void @external_void_func_v2p0(<2 x i8*>) #0
+declare hidden void @external_void_func_p0(ptr) #0
+declare hidden void @external_void_func_v2p0(<2 x ptr>) #0
 
 declare hidden void @external_void_func_f16(half) #0
 declare hidden void @external_void_func_f32(float) #0
@@ -62,13 +62,13 @@ declare hidden void @external_void_func_v8i32(<8 x i32>) #0
 declare hidden void @external_void_func_v16i32(<16 x i32>) #0
 declare hidden void @external_void_func_v32i32(<32 x i32>) #0
 declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0
-declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, i8 addrspace(3)*, i8 addrspace(5)*) #0
+declare hidden void @external_void_func_v32i32_p3_p5(<32 x i32>, ptr addrspace(3), ptr addrspace(5)) #0
 declare hidden void @external_void_func_v32i32_i8_i8_i16(<32 x i32>, i8, i8, i16) #0
 
 ; Structs
 declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0
-declare hidden void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
-declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0
+declare hidden void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 })) #0
+declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }), ptr addrspace(5) byval({ i8, i32 })) #0
 
 declare hidden void @external_void_func_v2i8(<2 x i8>) #0
 declare hidden void @external_void_func_v3i8(<3 x i8>) #0
@@ -76,7 +76,7 @@ declare hidden void @external_void_func_v4i8(<4 x i8>) #0
 declare hidden void @external_void_func_v8i8(<8 x i8>) #0
 declare hidden void @external_void_func_v16i8(<16 x i8>) #0
 
-declare hidden void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0
+declare hidden void @byval_align16_f64_arg(<32 x i32>, ptr addrspace(5) byval(double) align 16) #0
 declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0
 declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>,
     <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0
@@ -391,7 +391,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_signext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -429,7 +429,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i1, i1 addrspace(1)* undef
+  %var = load volatile i1, ptr addrspace(1) undef
   call void @external_void_func_i1_signext(i1 signext %var)
   ret void
 }
@@ -451,7 +451,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s1) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s1) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i1_zeroext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -489,7 +489,7 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i1_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i1, i1 addrspace(1)* undef
+  %var = load volatile i1, ptr addrspace(1) undef
   call void @external_void_func_i1_zeroext(i1 zeroext %var)
   ret void
 }
@@ -570,7 +570,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_signext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -609,7 +609,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i8, i8 addrspace(1)* undef
+  %var = load volatile i8, ptr addrspace(1) undef
   call void @external_void_func_i8_signext(i8 signext %var)
   ret void
 }
@@ -631,7 +631,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i8_zeroext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -670,7 +670,7 @@ define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i8_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i8, i8 addrspace(1)* undef
+  %var = load volatile i8, ptr addrspace(1) undef
   call void @external_void_func_i8_zeroext(i8 zeroext %var)
   ret void
 }
@@ -749,7 +749,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_signext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -787,7 +787,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_signext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i16, i16 addrspace(1)* undef
+  %var = load volatile i16, ptr addrspace(1) undef
   call void @external_void_func_i16_signext(i16 signext %var)
   ret void
 }
@@ -809,7 +809,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s16) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i16_zeroext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -847,7 +847,7 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i16_zeroext, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i16, i16 addrspace(1)* undef
+  %var = load volatile i16, ptr addrspace(1) undef
   call void @external_void_func_i16_zeroext(i16 zeroext %var)
   ret void
 }
@@ -1021,7 +1021,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i64
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1062,7 +1062,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <2 x i64>, <2 x i64> addrspace(1)* null
+  %val = load <2 x i64>, ptr addrspace(1) null
   call void @external_void_func_v2i64(<2 x i64> %val)
   ret void
 }
@@ -1146,7 +1146,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1186,7 +1186,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i48, i48 addrspace(1)* undef
+  %var = load volatile i48, ptr addrspace(1) undef
   call void @external_void_func_i48(i48 %var)
   ret void
 }
@@ -1208,7 +1208,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_signext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1248,7 +1248,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_signext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_signext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i48, i48 addrspace(1)* undef
+  %var = load volatile i48, ptr addrspace(1) undef
   call void @external_void_func_i48_signext(i48 signext %var)
   ret void
 }
@@ -1270,7 +1270,7 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (volatile "amdgpu-noclobber" load (s48) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_i48_zeroext
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1310,12 +1310,12 @@ define amdgpu_kernel void @test_call_external_void_func_i48_zeroext(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_i48_zeroext, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %var = load volatile i48, i48 addrspace(1)* undef
+  %var = load volatile i48, ptr addrspace(1) undef
   call void @external_void_func_i48_zeroext(i48 zeroext %var)
   ret void
 }
 
-define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
+define amdgpu_kernel void @test_call_external_void_func_p0_imm(ptr %arg) #0 {
   ; CHECK-LABEL: name: test_call_external_void_func_p0_imm
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1331,7 +1331,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.arg.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_p0
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1370,7 +1370,7 @@ define amdgpu_kernel void @test_call_external_void_func_p0_imm(i8* %arg) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  call void @external_void_func_p0(i8* %arg)
+  call void @external_void_func_p0(ptr %arg)
   ret void
 }
 
@@ -1390,7 +1390,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `<2 x i8*> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x p0>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x p0>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2p0
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -1431,8 +1431,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2p0() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2p0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <2 x i8*>, <2 x i8*> addrspace(1)* null
-  call void @external_void_func_v2p0(<2 x i8*> %val)
+  %val = load <2 x ptr>, ptr addrspace(1) null
+  call void @external_void_func_v2p0(<2 x ptr> %val)
   ret void
 }
 
@@ -1455,7 +1455,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[DEF]](s64)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<3 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i64
@@ -1499,7 +1499,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
+  %load = load <2 x i64>, ptr addrspace(1) null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2>
 
   call void @external_void_func_v3i64(<3 x i64> %val)
@@ -1525,7 +1525,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8589934593
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 17179869187
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C2]](s64)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `<2 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s64>) = G_LOAD [[C]](p1) :: ("amdgpu-noclobber" load (<2 x s64>) from `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   [[SHUF:%[0-9]+]]:_(<4 x s64>) = G_SHUFFLE_VECTOR [[LOAD]](<2 x s64>), [[BUILD_VECTOR]], shufflemask(0, 1, 2, 3)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i64
@@ -1571,7 +1571,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i64, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %load = load <2 x i64>, <2 x i64> addrspace(1)* null
+  %load = load <2 x i64>, ptr addrspace(1) null
   %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   call void @external_void_func_v4i64(<4 x i64> %val)
   ret void
@@ -2079,7 +2079,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2116,7 +2116,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
+  %val = load <2 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v2i16(<2 x i16> %val)
   ret void
 }
@@ -2137,7 +2137,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2179,7 +2179,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
+  %val = load <3 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v3i16(<3 x i16> %val)
   ret void
 }
@@ -2200,7 +2200,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<3 x s16>) from `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3f16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2242,7 +2242,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3f16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <3 x half>, <3 x half> addrspace(1)* undef
+  %val = load <3 x half>, ptr addrspace(1) undef
   call void @external_void_func_v3f16(<3 x half> %val)
   ret void
 }
@@ -2263,7 +2263,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2302,7 +2302,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
+  %val = load <4 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v4i16(<4 x i16> %val)
   ret void
 }
@@ -2385,7 +2385,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<5 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<5 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v5i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2428,7 +2428,7 @@ define amdgpu_kernel void @test_call_external_void_func_v5i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v5i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <5 x i16>, <5 x i16> addrspace(1)* undef
+  %val = load <5 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v5i16(<5 x i16> %val)
   ret void
 }
@@ -2449,7 +2449,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `<7 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<7 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<7 x s16>) from `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v7i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2493,7 +2493,7 @@ define amdgpu_kernel void @test_call_external_void_func_v7i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v7i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <7 x i16>, <7 x i16> addrspace(1)* undef
+  %val = load <7 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v7i16(<7 x i16> %val)
   ret void
 }
@@ -2514,7 +2514,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `<63 x i16> addrspace(1)* undef`, align 128, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<63 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<63 x s16>) from `ptr addrspace(1) undef`, align 128, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v63i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2589,7 +2589,7 @@ define amdgpu_kernel void @test_call_external_void_func_v63i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v63i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <63 x i16>, <63 x i16> addrspace(1)* undef
+  %val = load <63 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v63i16(<63 x i16> %val)
   ret void
 }
@@ -2610,7 +2610,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<65 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<65 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v65i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2688,7 +2688,7 @@ define amdgpu_kernel void @test_call_external_void_func_v65i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v65i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <65 x i16>, <65 x i16> addrspace(1)* undef
+  %val = load <65 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v65i16(<65 x i16> %val)
   ret void
 }
@@ -2709,7 +2709,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `<66 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<66 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<66 x s16>) from `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v66i16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2784,7 +2784,7 @@ define amdgpu_kernel void @test_call_external_void_func_v66i16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v66i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <66 x i16>, <66 x i16> addrspace(1)* undef
+  %val = load <66 x i16>, ptr addrspace(1) undef
   call void @external_void_func_v66i16(<66 x i16> %val)
   ret void
 }
@@ -2805,7 +2805,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s16>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2f16
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2842,7 +2842,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2f16, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <2 x half>, <2 x half> addrspace(1)* undef
+  %val = load <2 x half>, ptr addrspace(1) undef
   call void @external_void_func_v2f16(<2 x half> %val)
   ret void
 }
@@ -2863,7 +2863,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<2 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i32
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -2902,7 +2902,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
+  %val = load <2 x i32>, ptr addrspace(1) undef
   call void @external_void_func_v2i32(<2 x i32> %val)
   ret void
 }
@@ -3111,7 +3111,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[DEF]](p1) :: ("amdgpu-noclobber" load (<4 x s32>) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i32
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -3152,7 +3152,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
+  %val = load <4 x i32>, ptr addrspace(1) undef
   call void @external_void_func_v4i32(<4 x i32> %val)
   ret void
 }
@@ -3303,7 +3303,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i32
@@ -3349,8 +3349,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <8 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v8i32(<8 x i32> %val)
   ret void
 }
@@ -3443,7 +3443,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i32
@@ -3497,8 +3497,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v16i32(<16 x i32> %val)
   ret void
 }
@@ -3519,7 +3519,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32
@@ -3592,8 +3592,8 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 4, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
-  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <32 x i32>, ptr addrspace(1) %ptr
   call void @external_void_func_v32i32(<32 x i32> %val)
   ret void
 }
@@ -3616,9 +3616,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s32) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i32
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
@@ -3693,9 +3693,9 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
-  %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
-  %val1 = load i32, i32 addrspace(1)* undef
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
+  %val1 = load i32, ptr addrspace(1) undef
   call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1)
   ret void
 }
@@ -3717,31 +3717,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (s16) from `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3752,10 +3751,10 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32)
   ; CHECK-NEXT:   G_STORE [[ANYEXT]](s16), [[PTR_ADD2]](p5) :: (store (s16) into stack + 4, align 4, addrspace 5)
-  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s16) = COPY [[ANYEXT]](s16)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
   ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32)
-  ; CHECK-NEXT:   G_STORE [[COPY21]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[COPY20]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
   ; CHECK-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
   ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
@@ -3790,25 +3789,25 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY22:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY22]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_i8_i8_i16, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 16, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
-  %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
-  %val1 = load i8, i8 addrspace(1)* undef
-  %val2 = load i8, i8 addrspace(1)* undef
-  %val3 = load i16, i16 addrspace(1)* undef
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
+  %val1 = load i8, ptr addrspace(1) undef
+  %val2 = load i8, ptr addrspace(1) undef
+  %val3 = load i16, ptr addrspace(1) undef
   call void @external_void_func_v32i32_i8_i8_i16(<32 x i32> %val0, i8 %val1, i8 %val2, i16 %val3)
   ret void
 }
@@ -3830,31 +3829,30 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p1) = COPY [[DEF1]](p1)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<32 x i32> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[COPY10]](p1) :: ("amdgpu-noclobber" load (p5) from `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p3) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p3) from `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(p5) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (p5) from `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_p3_p5
-  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY8]]
-  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY7]]
-  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(p4) = COPY [[COPY9]](p4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY13]], [[C]](s64)
-  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s64) = COPY [[COPY6]]
-  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY5]]
-  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY4]]
-  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY12]], [[C]](s64)
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s64) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY3]]
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
-  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C1]](s32)
-  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY18]], [[SHL]]
-  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY18]], [[C1]](s32)
+  ; CHECK-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY17]], [[SHL]]
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
-  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY20]], [[C2]](s32)
+  ; CHECK-NEXT:   [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY19]], [[C2]](s32)
   ; CHECK-NEXT:   [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](<32 x s32>)
   ; CHECK-NEXT:   [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg
@@ -3898,25 +3896,25 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_p3_p5() #0 {
   ; CHECK-NEXT:   $vgpr28 = COPY [[UV28]](s32)
   ; CHECK-NEXT:   $vgpr29 = COPY [[UV29]](s32)
   ; CHECK-NEXT:   $vgpr30 = COPY [[UV30]](s32)
-  ; CHECK-NEXT:   [[COPY21:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
-  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY21]](<4 x s32>)
-  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY11]](p4)
-  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY12]](p4)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY20]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY11]](p4)
   ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4)
-  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY14]](s64)
-  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY15]](s32)
-  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY16]](s32)
-  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY17]](s32)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY13]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY16]](s32)
   ; CHECK-NEXT:   $sgpr15 = COPY [[DEF2]](s32)
   ; CHECK-NEXT:   $vgpr31 = COPY [[OR1]](s32)
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v32i32_p3_p5, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 12, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
-  %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0
-  %val1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* undef
-  %val2 = load i8 addrspace(5)*, i8 addrspace(5)* addrspace(1)* undef
-  call void @external_void_func_v32i32_p3_p5(<32 x i32> %val0, i8 addrspace(3)* %val1, i8 addrspace(5)* %val2)
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val0 = load <32 x i32>, ptr addrspace(1) %ptr0
+  %val1 = load ptr addrspace(3), ptr addrspace(1) undef
+  %val2 = load ptr addrspace(5), ptr addrspace(1) undef
+  call void @external_void_func_v32i32_p3_p5(<32 x i32> %val0, ptr addrspace(3) %val1, ptr addrspace(5) %val2)
   ret void
 }
 
@@ -3936,7 +3934,7 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (s8) from %ir.ptr0, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -3980,8 +3978,8 @@ define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call void @external_void_func_struct_i8_i32({ i8, i32 } %val)
   ret void
 }
@@ -3990,7 +3988,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -4006,8 +4004,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32, csr_amdgpu_si_gfx, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32({ i8, i32 } %val)
   ret void
 }
@@ -4016,7 +4014,7 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   ; CHECK-LABEL: name: test_gfx_call_external_void_func_struct_i8_i32_inreg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `{ i8, i32 } addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[LOAD]](p1) :: (load (s8) from %ir.ptr0, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[LOAD]], [[C]](s64)
@@ -4032,8 +4030,8 @@ define amdgpu_gfx void @test_gfx_call_external_void_func_struct_i8_i32_inreg() #
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_gfx_void_func_struct_i8_i32_inreg, csr_amdgpu_si_gfx, implicit $sgpr4, implicit $sgpr5, implicit $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef
-  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0
+  %ptr0 = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load { i8, i32 }, ptr addrspace(1) %ptr0
   call amdgpu_gfx void @external_gfx_void_func_struct_i8_i32_inreg({ i8, i32 } inreg %val)
   ret void
 }
@@ -4058,7 +4056,7 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32)
-  ; CHECK-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.gep01, addrspace 5)
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.val, addrspace 5)
   ; CHECK-NEXT:   G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32
@@ -4101,17 +4099,17 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 8, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
   %val = alloca { i8, i32 }, align 4, addrspace(5)
-  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0
-  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1
-  store i8 3, i8 addrspace(5)* %gep0
-  store i32 8, i32 addrspace(5)* %gep1
-  call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val)
+  %gep0 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 0
+  %gep1 = getelementptr inbounds { i8, i32 }, ptr addrspace(5) %val, i32 0, i32 1
+  store i8 3, ptr addrspace(5) %gep0
+  store i32 8, ptr addrspace(5) %gep1
+  call void @external_void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %val)
   ret void
 }
 
-declare void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %arg0, i8 addrspace(5)* byval(i8) align 32 %arg1, i32 %arg2) #0
+declare void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %arg0, ptr addrspace(5) byval(i8) align 32 %arg1, i32 %arg2) #0
 
-define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming0, i8 addrspace(5)* align 32 %incoming1) #0 {
+define void @call_byval_3ai32_byval_i8_align32(ptr addrspace(5) %incoming0, ptr addrspace(5) align 32 %incoming1) #0 {
   ; CHECK-LABEL: name: call_byval_3ai32_byval_i8_align32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4163,15 +4161,15 @@ define void @call_byval_3ai32_byval_i8_align32([3 x i32] addrspace(5)* %incoming
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a3i32_byval_i8_align32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 36, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  call void @void_func_byval_a3i32_byval_i8_align32([3 x i32] addrspace(5)* byval([3 x i32]) %incoming0, i8 addrspace(5)* align 32 %incoming1, i32 999)
+  call void @void_func_byval_a3i32_byval_i8_align32(ptr addrspace(5) byval([3 x i32]) %incoming0, ptr addrspace(5) align 32 %incoming1, i32 999)
   ret void
 }
 
-declare void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %arg0) #0
+declare void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %arg0) #0
 
 ; Make sure we are aware of the higher alignment of the incoming value
 ; than implied by the outgoing byval alignment in the memory operand.
-define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)* align 256 %incoming_high_align) #0 {
+define void @call_byval_a4i64_align4_higher_source_align(ptr addrspace(5) align 256 %incoming_high_align) #0 {
   ; CHECK-LABEL: name: call_byval_a4i64_align4_higher_source_align
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -4216,7 +4214,7 @@ define void @call_byval_a4i64_align4_higher_source_align([4 x i64] addrspace(5)*
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @void_func_byval_a4i64_align4, csr_amdgpu, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 32, implicit-def $scc
   ; CHECK-NEXT:   SI_RETURN
-  call void @void_func_byval_a4i64_align4([4 x i64] addrspace(5)* byval([4 x i64]) align 4 %incoming_high_align)
+  call void @void_func_byval_a4i64_align4(ptr addrspace(5) byval([4 x i64]) align 4 %incoming_high_align)
   ret void
 }
 
@@ -4236,7 +4234,7 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<2 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<2 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v2i8
@@ -4280,8 +4278,8 @@ define amdgpu_kernel void @test_call_external_void_func_v2i8() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v2i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <2 x i8> addrspace(1)*, <2 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <2 x i8>, <2 x i8> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <2 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v2i8(<2 x i8> %val)
   ret void
 }
@@ -4302,7 +4300,7 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<3 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<3 x s8>) from %ir.ptr, align 4, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v3i8
@@ -4349,8 +4347,8 @@ define amdgpu_kernel void @test_call_external_void_func_v3i8() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v3i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <3 x i8> addrspace(1)*, <3 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <3 x i8>, <3 x i8> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <3 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v3i8(<3 x i8> %val)
   ret void
 }
@@ -4371,7 +4369,7 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<4 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<4 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v4i8
@@ -4421,8 +4419,8 @@ define amdgpu_kernel void @test_call_external_void_func_v4i8() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v4i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <4 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v4i8(<4 x i8> %val)
   ret void
 }
@@ -4443,7 +4441,7 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<8 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<8 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v8i8
@@ -4505,8 +4503,8 @@ define amdgpu_kernel void @test_call_external_void_func_v8i8() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v8i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <8 x i8> addrspace(1)*, <8 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <8 x i8>, <8 x i8> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <8 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v8i8(<8 x i8> %val)
   ret void
 }
@@ -4527,7 +4525,7 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `<16 x i8> addrspace(1)* addrspace(4)* undef`, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<16 x s8>) from %ir.ptr, addrspace 1)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v16i8
@@ -4613,8 +4611,8 @@ define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 {
   ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @external_void_func_v16i8, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
   ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
   ; CHECK-NEXT:   S_ENDPGM 0
-  %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
-  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
+  %ptr = load ptr addrspace(1), ptr addrspace(4) undef
+  %val = load <16 x i8>, ptr addrspace(1) %ptr
   call void @external_void_func_v16i8(<16 x i8> %val)
   ret void
 }
@@ -4635,10 +4633,10 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C]](s64)
-  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
index 44bf8ce235d6f..1d5490bec296f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constant-fold-vector-op.ll
@@ -12,10 +12,10 @@ define amdgpu_kernel void @constant_fold_vector_add() {
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64), [[C2]](s64), [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<4 x s64>), [[C1]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
 entry:
   %add = add <4 x i64> zeroinitializer, zeroinitializer
-  store <4 x i64> %add, <4 x i64> addrspace(1)* null, align 32
+  store <4 x i64> %add, ptr addrspace(1) null, align 32
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
index 138e66ffd6c7a..6a0975f7fa72e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constantexpr.ll
@@ -17,7 +17,7 @@ define i32 @test() {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
   ; CHECK-NEXT:   $vgpr0 = COPY [[COPY3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  ret i32 bitcast (<1 x i32> <i32 extractelement (<1 x i32> bitcast (i32 zext (i1 icmp eq (i32* @var, i32* inttoptr (i32 -1 to i32*)) to i32) to <1 x i32>), i64 0)> to i32)
+  ret i32 bitcast (<1 x i32> <i32 extractelement (<1 x i32> bitcast (i32 zext (i1 icmp eq (ptr @var, ptr inttoptr (i32 -1 to ptr)) to i32) to <1 x i32>), i64 0)> to i32)
 }
 
 @gint = external addrspace(1) global i8, align 4
@@ -34,9 +34,9 @@ define amdgpu_kernel void @constantexpr_select_0() {
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
-  store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* null), i32 1, i32 0), i32 addrspace(1)* undef, align 4
+  store i32 select (i1 icmp eq (ptr addrspace(1) @gint, ptr addrspace(1) null), i32 1, i32 0), ptr addrspace(1) undef, align 4
   ret void
 }
 
@@ -51,9 +51,9 @@ define amdgpu_kernel void @constantexpr_select_1() {
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[C1]], [[C2]]
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[SELECT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
-  store i32 select (i1 icmp eq (i8 addrspace(1)* @gint, i8 addrspace(1)* inttoptr (i64 1024 to i8 addrspace(1)*)), i32 1, i32 0), i32 addrspace(1)* undef, align 4
+  store i32 select (i1 icmp eq (ptr addrspace(1) @gint, ptr addrspace(1) inttoptr (i64 1024 to ptr addrspace(1))), i32 1, i32 0), ptr addrspace(1) undef, align 4
   ret void
 }
 
@@ -74,5 +74,5 @@ define i32 @test_fcmp_constexpr() {
   ; CHECK-NEXT:   $vgpr0 = COPY [[ZEXT]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
 entry:
-  ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (i32* getelementptr inbounds ([2 x i32], [2 x i32]* @a, i64 0, i64 1), i32* @var) to float), float 0.000000e+00) to i32)
+  ret i32 zext (i1 fcmp oeq (float uitofp (i1 icmp eq (ptr getelementptr inbounds ([2 x i32], ptr @a, i64 0, i64 1), ptr @var) to float), float 0.000000e+00) to i32)
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
index a3ffff28a9d8e..ccd186e11c44f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
@@ -5,7 +5,7 @@
 ; CHECK: nnan G_FADD
 define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
   %res = fadd nnan float %arg0, %arg1
-  store float %res, float addrspace(1)* undef
+  store float %res, ptr addrspace(1) undef
   ret void
 }
 
@@ -14,7 +14,7 @@ define amdgpu_kernel void @fadd_nnan(float %arg0, float %arg1) {
 ; CHECK: nnan ninf nsz arcp contract afn reassoc G_FMA
 define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
   %res = call fast float @llvm.fma.f32(float %arg0, float %arg1, float %arg2)
-  store float %res, float addrspace(1)* undef
+  store float %res, ptr addrspace(1) undef
   ret void
 }
 
@@ -23,7 +23,7 @@ define amdgpu_kernel void @fma_fast(float %arg0, float %arg1, float %arg2) {
 ; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %{{[0-9]+}}(s32)
 define amdgpu_kernel void @rcp_nsz(float %arg0) {
   %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
-  store float %res, float addrspace(1)* undef
+  store float %res, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
index af0dd868c9092..34d59ea1e608b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fixed-function-abi-vgpr-args.ll
@@ -43,8 +43,8 @@ define void @void_a31i32_i32([31 x i32] %arg0, i32 %arg1) {
   ; FIXED-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; FIXED-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; FIXED-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; FIXED-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; FIXED-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; FIXED-NEXT:   SI_RETURN
-  store i32 %arg1, i32 addrspace(1)* undef
+  store i32 %arg1, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
index 8adc0ac7009b6..34860d46d832e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
@@ -12,9 +12,9 @@ define void @void_func_empty_arg({} %arg0, i32 %arg1) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg1, i32 addrspace(1)* undef
+  store i32 %arg1, ptr addrspace(1) undef
   ret void
 }
 
@@ -25,9 +25,9 @@ define void @void_func_empty_array([0 x i8] %arg0, i32 %arg1) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg1, i32 addrspace(1)* undef
+  store i32 %arg1, ptr addrspace(1) undef
   ret void
 }
 
@@ -39,9 +39,9 @@ define void @void_func_i1(i1 %arg0) #0 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (store (s1) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i1 %arg0, i1 addrspace(1)* undef
+  store i1 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -57,11 +57,11 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = zext i1 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -77,11 +77,11 @@ define void @void_func_i1_signext(i1 signext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s1)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = sext i1 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -104,7 +104,7 @@ define void @i1_arg_i1_use(i1 %arg) #0 {
   ; CHECK-NEXT: bb.2.bb1:
   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[C1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   G_BR %bb.3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.bb2:
@@ -114,7 +114,7 @@ bb:
   br i1 %arg, label %bb2, label %bb1
 
 bb1:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   br label %bb2
 
 bb2:
@@ -130,9 +130,9 @@ define void @void_func_i8(i8 %arg0) #0 {
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
   ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i8 %arg0, i8 addrspace(1)* undef
+  store i8 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -148,11 +148,11 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = zext i8 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -168,11 +168,11 @@ define void @void_func_i8_signext(i8 signext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = sext i8 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -184,9 +184,9 @@ define void @void_func_i16(i16 %arg0) #0 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i16 %arg0, i16 addrspace(1)* undef
+  store i16 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -202,11 +202,11 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s16)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = zext i16 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -222,11 +222,11 @@ define void @void_func_i16_signext(i16 signext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s16)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[SEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = sext i16 %arg0 to i32
   %add = add i32 %ext, 12
-  store i32 %add, i32 addrspace(1)* undef
+  store i32 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -238,9 +238,9 @@ define void @void_func_i24(i24 %arg0) #0 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[COPY]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i24 %arg0, i24 addrspace(1)* undef
+  store i24 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -253,9 +253,9 @@ define void @void_func_i24_zeroext(i24 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[ASSERT_ZEXT:%[0-9]+]]:_(s32) = G_ASSERT_ZEXT [[COPY]], 24
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_ZEXT]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i24 %arg0, i24 addrspace(1)* undef
+  store i24 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -268,9 +268,9 @@ define void @void_func_i24_signext(i24 signext %arg0) #0 {
   ; CHECK-NEXT:   [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 24
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s24) = G_TRUNC [[ASSERT_SEXT]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `i24 addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s24), [[DEF]](p1) :: (store (s24) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i24 %arg0, i24 addrspace(1)* undef
+  store i24 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -281,9 +281,9 @@ define void @void_func_i32(i32 %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg0, i32 addrspace(1)* undef
+  store i32 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -295,9 +295,9 @@ define void @void_func_i32_signext(i32 signext %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg0, i32 addrspace(1)* undef
+  store i32 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -309,22 +309,22 @@ define void @void_func_i32_zeroext(i32 zeroext %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg0, i32 addrspace(1)* undef
+  store i32 %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_p3i8(i8 addrspace(3)* %arg0) #0 {
+define void @void_func_p3i8(ptr addrspace(3) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[DEF]](p1) :: (store (p3) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i8 addrspace(3)* %arg0, i8 addrspace(3)* addrspace(1)* undef
+  store ptr addrspace(3) %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -338,9 +338,9 @@ define void @void_func_i48(i48 %arg0) #0 {
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[MV]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `i48 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s48), [[DEF]](p1) :: (store (s48) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i48 %arg0, i48 addrspace(1)* undef
+  store i48 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -357,11 +357,11 @@ define void @void_func_i48_zeroext(i48 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[TRUNC]](s48)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = zext i48 %arg0 to i64
   %add = add i64 %ext, 12
-  store i64 %add, i64 addrspace(1)* undef
+  store i64 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -378,11 +378,11 @@ define void @void_func_i48_signext(i48 signext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s48)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s64) = G_ADD [[SEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = sext i48 %arg0 to i64
   %add = add i64 %ext, 12
-  store i64 %add, i64 addrspace(1)* undef
+  store i64 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -395,9 +395,9 @@ define void @void_func_i64(i64 %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i64 %arg0, i64 addrspace(1)* undef
+  store i64 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -412,9 +412,9 @@ define void @void_func_i95(i95 %arg0) #0 {
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s95) = G_TRUNC [[MV]](s96)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `i95 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s95), [[DEF]](p1) :: (store (s95) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i95 %arg0, i95 addrspace(1)* undef
+  store i95 %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -432,11 +432,11 @@ define void @void_func_i95_zeroext(i95 zeroext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[TRUNC]](s95)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[ZEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = zext i95 %arg0 to i96
   %add = add i96 %ext, 12
-  store i96 %add, i96 addrspace(1)* undef
+  store i96 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -454,11 +454,11 @@ define void @void_func_i95_signext(i95 signext %arg0) #0 {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[TRUNC]](s95)
   ; CHECK-NEXT:   [[ADD:%[0-9]+]]:_(s96) = G_ADD [[SEXT]], [[C]]
-  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[ADD]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %ext = sext i95 %arg0 to i96
   %add = add i96 %ext, 12
-  store i96 %add, i96 addrspace(1)* undef
+  store i96 %add, ptr addrspace(1) undef
   ret void
 }
 
@@ -472,13 +472,13 @@ define void @void_func_i96(i96 %arg0) #0 {
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i96 %arg0, i96 addrspace(1)* undef
+  store i96 %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_p0i8(i8* %arg0) #0 {
+define void @void_func_p0i8(ptr %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p0i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -487,13 +487,13 @@ define void @void_func_p0i8(i8* %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `i8* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](p0), [[DEF]](p1) :: (store (p0) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i8* %arg0, i8* addrspace(1)* undef
+  store ptr %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 {
+define void @void_func_p1i8(ptr addrspace(1) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_p1i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -502,9 +502,9 @@ define void @void_func_p1i8(i8 addrspace(1)* %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `i8 addrspace(1)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[DEF]](p1) :: (store (p1) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i8 addrspace(1)* %arg0, i8 addrspace(1)* addrspace(1)* undef
+  store ptr addrspace(1) %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -516,9 +516,9 @@ define void @void_func_f16(half %arg0) #0 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s16), [[DEF]](p1) :: (store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store half %arg0, half addrspace(1)* undef
+  store half %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -529,9 +529,9 @@ define void @void_func_f32(float %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store float %arg0, float addrspace(1)* undef
+  store float %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -544,9 +544,9 @@ define void @void_func_f64(double %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `double addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store double %arg0, double addrspace(1)* undef
+  store double %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -559,9 +559,9 @@ define void @void_func_v2i32(<2 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i32> %arg0, <2 x i32> addrspace(1)* undef
+  store <2 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -575,9 +575,9 @@ define void @void_func_v2i24(<2 x i24> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<2 x s24>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `<2 x i24> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<2 x s24>), [[DEF]](p1) :: (store (<2 x s24>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i24> %arg0, <2 x i24> addrspace(1)* undef
+  store <2 x i24> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -592,9 +592,9 @@ define void @void_func_v3i24(<3 x i24> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<3 x s24>) = G_TRUNC [[BUILD_VECTOR]](<3 x s32>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `<3 x i24> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<3 x s24>), [[DEF]](p1) :: (store (<3 x s24>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x i24> %arg0, <3 x i24> addrspace(1)* undef
+  store <3 x i24> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -610,9 +610,9 @@ define void @void_func_v2i8(<2 x i8> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
   ; CHECK-NEXT:   [[TRUNC2:%[0-9]+]]:_(<2 x s8>) = G_TRUNC [[BUILD_VECTOR]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `<2 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC2]](<2 x s8>), [[DEF]](p1) :: (store (<2 x s8>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i8> %arg0, <2 x i8> addrspace(1)* undef
+  store <2 x i8> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -630,9 +630,9 @@ define void @void_func_v3i8(<3 x i8> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16)
   ; CHECK-NEXT:   [[TRUNC3:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[BUILD_VECTOR]](<3 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `<3 x i8> addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC3]](<3 x s8>), [[DEF]](p1) :: (store (<3 x s8>) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x i8> %arg0, <3 x i8> addrspace(1)* undef
+  store <3 x i8> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -652,13 +652,13 @@ define void @void_func_v4i8(<4 x i8> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16)
   ; CHECK-NEXT:   [[TRUNC4:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `<4 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC4]](<4 x s8>), [[DEF]](p1) :: (store (<4 x s8>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x i8> %arg0, <4 x i8> addrspace(1)* undef
+  store <4 x i8> %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 {
+define void @void_func_v2p3i8(<2 x ptr addrspace(3)> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p3i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -667,9 +667,9 @@ define void @void_func_v2p3i8(<2 x i8 addrspace(3)*> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p3) = COPY $vgpr1
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY]](p3), [[COPY1]](p3)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `<2 x i8 addrspace(3)*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i8 addrspace(3)*> %arg0, <2 x i8 addrspace(3)*> addrspace(1)* undef
+  store <2 x ptr addrspace(3)> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -683,9 +683,9 @@ define void @void_func_v3i32(<3 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x i32> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x i32> %arg0, <3 x i32> addrspace(1)* undef
+  store <3 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -700,9 +700,9 @@ define void @void_func_v4i32(<4 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x i32> %arg0, <4 x i32> addrspace(1)* undef
+  store <4 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -718,9 +718,9 @@ define void @void_func_v5i32(<5 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `<5 x i32> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s32>), [[DEF]](p1) :: (store (<5 x s32>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <5 x i32> %arg0, <5 x i32> addrspace(1)* undef
+  store <5 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -739,9 +739,9 @@ define void @void_func_v8i32(<8 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x i32> %arg0, <8 x i32> addrspace(1)* undef
+  store <8 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -768,9 +768,9 @@ define void @void_func_v16i32(<16 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x i32> %arg0, <16 x i32> addrspace(1)* undef
+  store <16 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -814,9 +814,9 @@ define void @void_func_v32i32(<32 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
+  store <32 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -863,9 +863,9 @@ define void @void_func_v33i32(<33 x i32> %arg0) #0 {
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<33 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[LOAD]](s32), [[LOAD1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `<33 x i32> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<33 x s32>), [[DEF]](p1) :: (store (<33 x s32>) into `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <33 x i32> %arg0, <33 x i32> addrspace(1)* undef
+  store <33 x i32> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -882,13 +882,13 @@ define void @void_func_v2i64(<2 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i64> %arg0, <2 x i64> addrspace(1)* undef
+  store <2 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 {
+define void @void_func_v2p0i8(<2 x ptr> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p0i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -901,13 +901,13 @@ define void @void_func_v2p0i8(<2 x i8*> %arg0) #0 {
   ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p0>) = G_BUILD_VECTOR [[MV]](p0), [[MV1]](p0)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `<2 x i8*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p0>), [[DEF]](p1) :: (store (<2 x p0>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i8*> %arg0, <2 x i8*> addrspace(1)* undef
+  store <2 x ptr> %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 {
+define void @void_func_v2p1i8(<2 x ptr addrspace(1)> %arg0) #0 {
   ; CHECK-LABEL: name: void_func_v2p1i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -920,9 +920,9 @@ define void @void_func_v2p1i8(<2 x i8 addrspace(1)*> %arg0) #0 {
   ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x p1>) = G_BUILD_VECTOR [[MV]](p1), [[MV1]](p1)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<2 x i8 addrspace(1)*> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i8 addrspace(1)*> %arg0, <2 x i8 addrspace(1)*> addrspace(1)* undef
+  store <2 x ptr addrspace(1)> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -942,9 +942,9 @@ define void @void_func_v3i64(<3 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x i64> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x i64> %arg0, <3 x i64> addrspace(1)* undef
+  store <3 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -967,9 +967,9 @@ define void @void_func_v4i64(<4 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x i64> %arg0, <4 x i64> addrspace(1)* undef
+  store <4 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -995,9 +995,9 @@ define void @void_func_v5i64(<5 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `<5 x i64> addrspace(1)* undef`, align 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s64>), [[DEF]](p1) :: (store (<5 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <5 x i64> %arg0, <5 x i64> addrspace(1)* undef
+  store <5 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1032,9 +1032,9 @@ define void @void_func_v8i64(<8 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x i64> %arg0, <8 x i64> addrspace(1)* undef
+  store <8 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1094,9 +1094,9 @@ define void @void_func_v16i64(<16 x i64> %arg0) #0 {
   ; CHECK-NEXT:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x i64> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x i64> %arg0, <16 x i64> addrspace(1)* undef
+  store <16 x i64> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1107,9 +1107,9 @@ define void @void_func_v2i16(<2 x i16> %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x i16> %arg0, <2 x i16> addrspace(1)* undef
+  store <2 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1124,9 +1124,9 @@ define void @void_func_v3i16(<3 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x i16> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x i16> %arg0, <3 x i16> addrspace(1)* undef
+  store <3 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1139,9 +1139,9 @@ define void @void_func_v4i16(<4 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x i16> %arg0, <4 x i16> addrspace(1)* undef
+  store <4 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1157,9 +1157,9 @@ define void @void_func_v5i16(<5 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<5 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `<5 x i16> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<5 x s16>), [[DEF]](p1) :: (store (<5 x s16>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <5 x i16> %arg0, <5 x i16> addrspace(1)* undef
+  store <5 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1174,9 +1174,9 @@ define void @void_func_v8i16(<8 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x i16> %arg0, <8 x i16> addrspace(1)* undef
+  store <8 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1195,9 +1195,9 @@ define void @void_func_v16i16(<16 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x i16> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x i16> %arg0, <16 x i16> addrspace(1)* undef
+  store <16 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1247,9 +1247,9 @@ define void @void_func_v65i16(<65 x i16> %arg0) #0 {
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16), [[UV8:%[0-9]+]]:_(s16), [[UV9:%[0-9]+]]:_(s16), [[UV10:%[0-9]+]]:_(s16), [[UV11:%[0-9]+]]:_(s16), [[UV12:%[0-9]+]]:_(s16), [[UV13:%[0-9]+]]:_(s16), [[UV14:%[0-9]+]]:_(s16), [[UV15:%[0-9]+]]:_(s16), [[UV16:%[0-9]+]]:_(s16), [[UV17:%[0-9]+]]:_(s16), [[UV18:%[0-9]+]]:_(s16), [[UV19:%[0-9]+]]:_(s16), [[UV20:%[0-9]+]]:_(s16), [[UV21:%[0-9]+]]:_(s16), [[UV22:%[0-9]+]]:_(s16), [[UV23:%[0-9]+]]:_(s16), [[UV24:%[0-9]+]]:_(s16), [[UV25:%[0-9]+]]:_(s16), [[UV26:%[0-9]+]]:_(s16), [[UV27:%[0-9]+]]:_(s16), [[UV28:%[0-9]+]]:_(s16), [[UV29:%[0-9]+]]:_(s16), [[UV30:%[0-9]+]]:_(s16), [[UV31:%[0-9]+]]:_(s16), [[UV32:%[0-9]+]]:_(s16), [[UV33:%[0-9]+]]:_(s16), [[UV34:%[0-9]+]]:_(s16), [[UV35:%[0-9]+]]:_(s16), [[UV36:%[0-9]+]]:_(s16), [[UV37:%[0-9]+]]:_(s16), [[UV38:%[0-9]+]]:_(s16), [[UV39:%[0-9]+]]:_(s16), [[UV40:%[0-9]+]]:_(s16), [[UV41:%[0-9]+]]:_(s16), [[UV42:%[0-9]+]]:_(s16), [[UV43:%[0-9]+]]:_(s16), [[UV44:%[0-9]+]]:_(s16), [[UV45:%[0-9]+]]:_(s16), [[UV46:%[0-9]+]]:_(s16), [[UV47:%[0-9]+]]:_(s16), [[UV48:%[0-9]+]]:_(s16), [[UV49:%[0-9]+]]:_(s16), [[UV50:%[0-9]+]]:_(s16), [[UV51:%[0-9]+]]:_(s16), [[UV52:%[0-9]+]]:_(s16), [[UV53:%[0-9]+]]:_(s16), [[UV54:%[0-9]+]]:_(s16), [[UV55:%[0-9]+]]:_(s16), [[UV56:%[0-9]+]]:_(s16), [[UV57:%[0-9]+]]:_(s16), [[UV58:%[0-9]+]]:_(s16), [[UV59:%[0-9]+]]:_(s16), [[UV60:%[0-9]+]]:_(s16), [[UV61:%[0-9]+]]:_(s16), [[UV62:%[0-9]+]]:_(s16), [[UV63:%[0-9]+]]:_(s16), [[UV64:%[0-9]+]]:_(s16), [[UV65:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<66 x s16>)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<65 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16), [[UV3]](s16), [[UV4]](s16), [[UV5]](s16), [[UV6]](s16), [[UV7]](s16), [[UV8]](s16), [[UV9]](s16), [[UV10]](s16), [[UV11]](s16), [[UV12]](s16), [[UV13]](s16), [[UV14]](s16), [[UV15]](s16), [[UV16]](s16), [[UV17]](s16), [[UV18]](s16), [[UV19]](s16), [[UV20]](s16), [[UV21]](s16), [[UV22]](s16), [[UV23]](s16), [[UV24]](s16), [[UV25]](s16), [[UV26]](s16), [[UV27]](s16), [[UV28]](s16), [[UV29]](s16), [[UV30]](s16), [[UV31]](s16), [[UV32]](s16), [[UV33]](s16), [[UV34]](s16), [[UV35]](s16), [[UV36]](s16), [[UV37]](s16), [[UV38]](s16), [[UV39]](s16), [[UV40]](s16), [[UV41]](s16), [[UV42]](s16), [[UV43]](s16), [[UV44]](s16), [[UV45]](s16), [[UV46]](s16), [[UV47]](s16), [[UV48]](s16), [[UV49]](s16), [[UV50]](s16), [[UV51]](s16), [[UV52]](s16), [[UV53]](s16), [[UV54]](s16), [[UV55]](s16), [[UV56]](s16), [[UV57]](s16), [[UV58]](s16), [[UV59]](s16), [[UV60]](s16), [[UV61]](s16), [[UV62]](s16), [[UV63]](s16), [[UV64]](s16)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `<65 x i16> addrspace(1)* undef`, align 256, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<65 x s16>), [[DEF]](p1) :: (store (<65 x s16>) into `ptr addrspace(1) undef`, align 256, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <65 x i16> %arg0, <65 x i16> addrspace(1)* undef
+  store <65 x i16> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1262,9 +1262,9 @@ define void @void_func_v2f32(<2 x float> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s32>), [[DEF]](p1) :: (store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x float> %arg0, <2 x float> addrspace(1)* undef
+  store <2 x float> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1278,9 +1278,9 @@ define void @void_func_v3f32(<3 x float> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `<3 x float> addrspace(1)* undef`, align 16, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s32>), [[DEF]](p1) :: (store (<3 x s32>) into `ptr addrspace(1) undef`, align 16, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x float> %arg0, <3 x float> addrspace(1)* undef
+  store <3 x float> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1295,9 +1295,9 @@ define void @void_func_v4f32(<4 x float> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x float> %arg0, <4 x float> addrspace(1)* undef
+  store <4 x float> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1316,9 +1316,9 @@ define void @void_func_v8f32(<8 x float> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s32>), [[DEF]](p1) :: (store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x float> %arg0, <8 x float> addrspace(1)* undef
+  store <8 x float> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1345,9 +1345,9 @@ define void @void_func_v16f32(<16 x float> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s32>), [[DEF]](p1) :: (store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x float> %arg0, <16 x float> addrspace(1)* undef
+  store <16 x float> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1364,9 +1364,9 @@ define void @void_func_v2f64(<2 x double> %arg0) #0 {
   ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x double> %arg0, <2 x double> addrspace(1)* undef
+  store <2 x double> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1386,9 +1386,9 @@ define void @void_func_v3f64(<3 x double> %arg0) #0 {
   ; CHECK-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `<3 x double> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s64>), [[DEF]](p1) :: (store (<3 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x double> %arg0, <3 x double> addrspace(1)* undef
+  store <3 x double> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1411,9 +1411,9 @@ define void @void_func_v4f64(<4 x double> %arg0) #0 {
   ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `<4 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<4 x s64>), [[DEF]](p1) :: (store (<4 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x double> %arg0, <4 x double> addrspace(1)* undef
+  store <4 x double> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1448,9 +1448,9 @@ define void @void_func_v8f64(<8 x double> %arg0) #0 {
   ; CHECK-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `<8 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<8 x s64>), [[DEF]](p1) :: (store (<8 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x double> %arg0, <8 x double> addrspace(1)* undef
+  store <8 x double> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1510,9 +1510,9 @@ define void @void_func_v16f64(<16 x double> %arg0) #0 {
   ; CHECK-NEXT:   [[MV15:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY30]](s32), [[LOAD]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64), [[MV12]](s64), [[MV13]](s64), [[MV14]](s64), [[MV15]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `<16 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<16 x s64>), [[DEF]](p1) :: (store (<16 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x double> %arg0, <16 x double> addrspace(1)* undef
+  store <16 x double> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1523,9 +1523,9 @@ define void @void_func_v2f16(<2 x half> %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](<2 x s16>), [[DEF]](p1) :: (store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <2 x half> %arg0, <2 x half> addrspace(1)* undef
+  store <2 x half> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1540,9 +1540,9 @@ define void @void_func_v3f16(<3 x half> %arg0) #0 {
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s16>)
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UV]](s16), [[UV1]](s16), [[UV2]](s16)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `<3 x half> addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<3 x s16>), [[DEF]](p1) :: (store (<3 x s16>) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <3 x half> %arg0, <3 x half> addrspace(1)* undef
+  store <3 x half> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1555,9 +1555,9 @@ define void @void_func_v4f16(<4 x half> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `<4 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<4 x s16>), [[DEF]](p1) :: (store (<4 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <4 x half> %arg0, <4 x half> addrspace(1)* undef
+  store <4 x half> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1572,9 +1572,9 @@ define void @void_func_v8f16(<8 x half> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<8 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <8 x half> %arg0, <8 x half> addrspace(1)* undef
+  store <8 x half> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1593,9 +1593,9 @@ define void @void_func_v16f16(<16 x half> %arg0) #0 {
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr7
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[COPY6]](<2 x s16>), [[COPY7]](<2 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `<16 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[CONCAT_VECTORS]](<16 x s16>), [[DEF]](p1) :: (store (<16 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store <16 x half> %arg0, <16 x half> addrspace(1)* undef
+  store <16 x half> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1611,14 +1611,13 @@ define void @void_func_i32_i64_i32(i32 %arg0, i64 %arg1, i32 %arg2) #0 {
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[COPY4]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile i32 %arg0, i32 addrspace(1)* undef
-  store volatile i64 %arg1, i64 addrspace(1)* undef
-  store volatile i32 %arg2, i32 addrspace(1)* undef
+  store volatile i32 %arg0, ptr addrspace(1) undef
+  store volatile i64 %arg1, ptr addrspace(1) undef
+  store volatile i32 %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -1629,9 +1628,9 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `{ i32 } addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store { i32 } %arg0, { i32 } addrspace(1)* undef
+  store { i32 } %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -1645,16 +1644,16 @@ define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
   ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC]](s16)
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK-NEXT:   G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY1]](s32), [[PTR_ADD]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef
+  store { i8, i32 } %arg0, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0) #0 {
+define void @void_func_byval_struct_i8_i32(ptr addrspace(5) byval({ i8, i32 }) %arg0) #0 {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
@@ -1664,17 +1663,17 @@ define void @void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8,
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 4, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD1]](p1) :: (store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.load = load { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
-  store { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
+  %arg0.load = load { i8, i32 }, ptr addrspace(5) %arg0
+  store { i8, i32 } %arg0.load, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg0, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
+define void @void_func_byval_struct_i8_i32_x2(ptr addrspace(5) byval({ i8, i32 }) %arg0, ptr addrspace(5) byval({ i8, i32 }) %arg1, i32 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_byval_struct_i8_i32_x2
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -1693,24 +1692,24 @@ define void @void_func_byval_struct_i8_i32_x2({ i8, i32 } addrspace(5)* byval({
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p5) :: (volatile dereferenceable load (s8) from %ir.arg1, align 4, addrspace 5)
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY1]], [[C]](s32)
   ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (volatile dereferenceable load (s32) from %ir.arg1 + 4, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[PTR_ADD2]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, align 4, addrspace 1)
   ; CHECK-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C1]](s64)
-  ; CHECK-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s32), [[PTR_ADD3]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY2]](s32), [[DEF1]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg0
-  %arg1.load = load volatile { i8, i32 }, { i8, i32 } addrspace(5)* %arg1
-  store volatile { i8, i32 } %arg0.load, { i8, i32 } addrspace(1)* undef
-  store volatile { i8, i32 } %arg1.load, { i8, i32 } addrspace(1)* undef
-  store volatile i32 %arg2, i32 addrspace(3)* undef
+  %arg0.load = load volatile { i8, i32 }, ptr addrspace(5) %arg0
+  %arg1.load = load volatile { i8, i32 }, ptr addrspace(5) %arg1
+  store volatile { i8, i32 } %arg0.load, ptr addrspace(1) undef
+  store volatile { i8, i32 } %arg1.load, ptr addrspace(1) undef
+  store volatile i32 %arg2, ptr addrspace(3) undef
   ret void
 }
 
-define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i64 addrspace(5)* byval(i64) %arg1) #0 {
+define void @void_func_byval_i32_byval_i64(ptr addrspace(5) byval(i32) %arg0, ptr addrspace(5) byval(i64) %arg1) #0 {
   ; CHECK-LABEL: name: void_func_byval_i32_byval_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1718,20 +1717,19 @@ define void @void_func_byval_i32_byval_i64(i32 addrspace(5)* byval(i32) %arg0, i
   ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s64) from %ir.arg1, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s64), [[COPY2]](p1) :: (store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s64), [[DEF]](p1) :: (store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.load = load i32, i32 addrspace(5)* %arg0
-  %arg1.load = load i64, i64 addrspace(5)* %arg1
-  store i32 %arg0.load, i32 addrspace(1)* undef
-  store i64 %arg1.load, i64 addrspace(1)* undef
+  %arg0.load = load i32, ptr addrspace(5) %arg0
+  %arg1.load = load i64, ptr addrspace(5) %arg1
+  store i32 %arg0.load, ptr addrspace(1) undef
+  store i64 %arg1.load, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 {
+define void @void_func_byval_i8_align32_i16_align64(ptr addrspace(5) byval(i8) %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 {
   ; CHECK-LABEL: name: void_func_byval_i8_align32_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1739,21 +1737,20 @@ define void @void_func_byval_i8_align32_i16_align64(i8 addrspace(5)* byval(i8) %
   ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s8) from %ir.arg0, addrspace 5)
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.load = load i8, i8 addrspace(5)* %arg0
-  %arg1.load = load i16, i16 addrspace(5)* %arg1
-  store i8 %arg0.load, i8 addrspace(1)* null
-  store i16 %arg1.load, i16 addrspace(1)* null
+  %arg0.load = load i8, ptr addrspace(5) %arg0
+  %arg1.load = load i16, ptr addrspace(5) %arg1
+  store i8 %arg0.load, ptr addrspace(1) null
+  store i16 %arg1.load, ptr addrspace(1) null
   ret void
 }
 
 ; Make sure the alignment is taken from the correct parameter.
-define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byval([3 x i32]) align 128 %arg0, i16 addrspace(5)* byval(i16) align 64 %arg1) #0 {
+define void @byval_a3i32_align128_byval_i16_align64(ptr addrspace(5) byval([3 x i32]) align 128 %arg0, ptr addrspace(5) byval(i16) align 64 %arg1) #0 {
   ; CHECK-LABEL: name: byval_a3i32_align128_byval_i16_align64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.1
@@ -1761,7 +1758,6 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva
   ; CHECK-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX1]](p5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[C]](p1)
   ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p5) :: (dereferenceable load (s32) from %ir.arg0, addrspace 5)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C1]](s32)
@@ -1770,24 +1766,24 @@ define void @byval_a3i32_align128_byval_i16_align64([3 x i32] addrspace(5)* byva
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C2]](s32)
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p5) :: (dereferenceable load (s32) from %ir.arg0 + 8, addrspace 5)
   ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[COPY1]](p5) :: (dereferenceable load (s16) from %ir.arg1, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 4
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 4, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C4]](p1) :: (store (s32) into `ptr addrspace(1) null` + 4, addrspace 1)
   ; CHECK-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
   ; CHECK-NEXT:   [[C6:%[0-9]+]]:_(p1) = G_CONSTANT i64 8
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `[3 x i32] addrspace(1)* null` + 8, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[COPY2]](p1) :: (store (s16) into `i16 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s32), [[C6]](p1) :: (store (s32) into `ptr addrspace(1) null` + 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[C]](p1) :: (store (s16) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  %arg0.load = load [3 x i32], [3 x i32] addrspace(5)* %arg0
-  %arg1.load = load i16, i16 addrspace(5)* %arg1
-  store [3 x i32] %arg0.load, [3 x i32] addrspace(1)* null
-  store i16 %arg1.load, i16 addrspace(1)* null
+  %arg0.load = load [3 x i32], ptr addrspace(5) %arg0
+  %arg1.load = load i16, ptr addrspace(5) %arg1
+  store [3 x i32] %arg0.load, ptr addrspace(1) null
+  store i16 %arg1.load, ptr addrspace(1) null
   ret void
 }
 
 ; byval argument after non-byval stack passed argument
-define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrspace(5)* byval(i8) align 8 %arg2) #0 {
+define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, ptr addrspace(5) byval(i8) align 8 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_i32_byval_i8
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1831,19 +1827,18 @@ define void @void_func_v32i32_i32_byval_i8(<32 x i32> %arg0, i32 %arg1, i8 addrs
   ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p5) = COPY [[FRAME_INDEX2]](p5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg2, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg1, i32 addrspace(1)* null
-  %arg2.load = load i8, i8 addrspace(5)* %arg2
-  store i8 %arg2.load, i8 addrspace(1)* null
+  store i32 %arg1, ptr addrspace(1) null
+  %arg2.load = load i8, ptr addrspace(5) %arg2
+  store i8 %arg2.load, ptr addrspace(1) null
   ret void
 }
 
 ; byval argument before non-byval stack passed argument
-define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* byval(i8) %arg1, i32 %arg2) #0 {
+define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, ptr addrspace(5) byval(i8) %arg1, i32 %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_byval_i8_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -1887,14 +1882,13 @@ define void @void_func_v32i32_byval_i8_i32(<32 x i32> %arg0, i8 addrspace(5)* by
   ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (s32) from %fixed-stack.0, align 8, addrspace 5)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `i32 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[C]](p1) :: (store (s32) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[COPY31]](p5) :: (dereferenceable load (s8) from %ir.arg1, addrspace 5)
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[COPY32]](p1) :: (store (s8) into `i8 addrspace(1)* null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](s8), [[C]](p1) :: (store (s8) into `ptr addrspace(1) null`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store i32 %arg2, i32 addrspace(1)* null
-  %arg1.load = load i8, i8 addrspace(5)* %arg1
-  store i8 %arg1.load, i8 addrspace(1)* null
+  store i32 %arg2, ptr addrspace(1) null
+  %arg1.load = load i8, ptr addrspace(5) %arg1
+  store i8 %arg1.load, ptr addrspace(1) null
   ret void
 }
 
@@ -1945,15 +1939,13 @@ define void @void_func_v32i32_i32_i64(<32 x i32> %arg0, i32 %arg1, i64 %arg2) #0
   ; CHECK-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p5) :: (invariant load (s32) from %fixed-stack.0, addrspace 5)
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[COPY31]](p1) :: (volatile store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[COPY32]](p1) :: (volatile store (s64) into `i64 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](s64), [[DEF]](p1) :: (volatile store (s64) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile i32 %arg1, i32 addrspace(1)* undef
-  store volatile i64 %arg2, i64 addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile i32 %arg1, ptr addrspace(1) undef
+  store volatile i64 %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2008,25 +2000,21 @@ define void @void_func_v32i32_i1_i8_i16(<32 x i32> %arg0, i1 %arg1, i8 %arg2, i1
   ; CHECK-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s16) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s16) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY33:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY34:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[COPY31]](p1) :: (volatile store (s1) into `i1 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[COPY32]](p1) :: (volatile store (s8) into `i8 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[COPY33]](p1) :: (volatile store (s16) into `i16 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD4]](s16), [[COPY34]](p1) :: (volatile store (s16) into `half addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s1), [[DEF]](p1) :: (volatile store (s1) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD3]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD4]](s16), [[DEF]](p1) :: (volatile store (s16) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile i1 %arg1, i1 addrspace(1)* undef
-  store volatile i8 %arg2, i8 addrspace(1)* undef
-  store volatile i16 %arg3, i16 addrspace(1)* undef
-  store volatile half %arg4, half addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile i1 %arg1, ptr addrspace(1) undef
+  store volatile i8 %arg2, ptr addrspace(1) undef
+  store volatile i16 %arg3, ptr addrspace(1) undef
+  store volatile half %arg4, ptr addrspace(1) undef
   ret void
 }
 
-define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1, i8 addrspace(5)* %arg2) #0 {
+define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, ptr addrspace(3) %arg1, ptr addrspace(5) %arg2) #0 {
   ; CHECK-LABEL: name: void_func_v32i32_p3_p5_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30
@@ -2070,15 +2058,13 @@ define void @void_func_v32i32_p3_p5_i16(<32 x i32> %arg0, i8 addrspace(3)* %arg1
   ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(p5) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (p5) from %fixed-stack.0, align 8, addrspace 5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](p3), [[COPY31]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](p5), [[COPY32]](p1) :: (volatile store (p5) into `i8 addrspace(5)* addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](p3), [[DEF]](p1) :: (volatile store (p3) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](p5), [[DEF]](p1) :: (volatile store (p5) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile i8 addrspace(3)* %arg1, i8 addrspace(3)* addrspace(1)* undef
-  store volatile i8 addrspace(5)* %arg2, i8 addrspace(5)* addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile ptr addrspace(3) %arg1, ptr addrspace(1) undef
+  store volatile ptr addrspace(5) %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2132,15 +2118,13 @@ define void @void_func_v32i32_v2i32_v2f32(<32 x i32> %arg0, <2 x i32> %arg1, <2
   ; CHECK-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[LOAD3]](s32), [[LOAD4]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[COPY31]](p1) :: (volatile store (<2 x s32>) into `<2 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[COPY32]](p1) :: (volatile store (<2 x s32>) into `<2 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s32>), [[DEF]](p1) :: (volatile store (<2 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <2 x i32> %arg1, <2 x i32> addrspace(1)* undef
-  store volatile <2 x float> %arg2, <2 x float> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <2 x i32> %arg1, ptr addrspace(1) undef
+  store volatile <2 x float> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2188,15 +2172,13 @@ define void @void_func_v32i32_v2i16_v2f16(<32 x i32> %arg0, <2 x i16> %arg1, <2
   ; CHECK-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; CHECK-NEXT:   [[LOAD2:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[FRAME_INDEX2]](p5) :: (invariant load (<2 x s16>) from %fixed-stack.0, align 8, addrspace 5)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[COPY31]](p1) :: (volatile store (<2 x s16>) into `<2 x i16> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[LOAD2]](<2 x s16>), [[COPY32]](p1) :: (volatile store (<2 x s16>) into `<2 x half> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD1]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[LOAD2]](<2 x s16>), [[DEF]](p1) :: (volatile store (<2 x s16>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <2 x i16> %arg1, <2 x i16> addrspace(1)* undef
-  store volatile <2 x half> %arg2, <2 x half> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <2 x i16> %arg1, ptr addrspace(1) undef
+  store volatile <2 x half> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2262,15 +2244,13 @@ define void @void_func_v32i32_v2i64_v2f64(<32 x i32> %arg0, <2 x i64> %arg1, <2
   ; CHECK-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD7]](s32), [[LOAD8]](s32)
   ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[MV2]](s64), [[MV3]](s64)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[COPY31]](p1) :: (volatile store (<2 x s64>) into `<2 x i64> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[COPY32]](p1) :: (volatile store (<2 x s64>) into `<2 x double> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<2 x s64>), [[DEF]](p1) :: (volatile store (<2 x s64>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <2 x i64> %arg1, <2 x i64> addrspace(1)* undef
-  store volatile <2 x double> %arg2, <2 x double> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <2 x i64> %arg1, ptr addrspace(1) undef
+  store volatile <2 x double> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2332,15 +2312,13 @@ define void @void_func_v32i32_v4i32_v4f32(<32 x i32> %arg0, <4 x i32> %arg1, <4
   ; CHECK-NEXT:   [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX8]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[COPY31]](p1) :: (volatile store (<4 x s32>) into `<4 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[COPY32]](p1) :: (volatile store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<4 x s32>), [[DEF]](p1) :: (volatile store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <4 x i32> %arg1, <4 x i32> addrspace(1)* undef
-  store volatile <4 x float> %arg2, <4 x float> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <4 x i32> %arg1, ptr addrspace(1) undef
+  store volatile <4 x float> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2418,15 +2396,13 @@ define void @void_func_v32i32_v8i32_v8f32(<32 x i32> %arg0, <8 x i32> %arg1, <8
   ; CHECK-NEXT:   [[LOAD16:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX16]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32), [[LOAD16]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[COPY31]](p1) :: (volatile store (<8 x s32>) into `<8 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[COPY32]](p1) :: (volatile store (<8 x s32>) into `<8 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<8 x s32>), [[DEF]](p1) :: (volatile store (<8 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <8 x i32> %arg1, <8 x i32> addrspace(1)* undef
-  store volatile <8 x float> %arg2, <8 x float> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <8 x i32> %arg1, ptr addrspace(1) undef
+  store volatile <8 x float> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2536,15 +2512,13 @@ define void @void_func_v32i32_v16i32_v16f32(<32 x i32> %arg0, <16 x i32> %arg1,
   ; CHECK-NEXT:   [[LOAD32:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX32]](p5) :: (invariant load (s32) from %fixed-stack.0, align 16, addrspace 5)
   ; CHECK-NEXT:   [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD17]](s32), [[LOAD18]](s32), [[LOAD19]](s32), [[LOAD20]](s32), [[LOAD21]](s32), [[LOAD22]](s32), [[LOAD23]](s32), [[LOAD24]](s32), [[LOAD25]](s32), [[LOAD26]](s32), [[LOAD27]](s32), [[LOAD28]](s32), [[LOAD29]](s32), [[LOAD30]](s32), [[LOAD31]](s32), [[LOAD32]](s32)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   [[COPY32:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[COPY31]](p1) :: (volatile store (<16 x s32>) into `<16 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[COPY32]](p1) :: (volatile store (<16 x s32>) into `<16 x float> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR2]](<16 x s32>), [[DEF]](p1) :: (volatile store (<16 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <16 x i32> %arg1, <16 x i32> addrspace(1)* undef
-  store volatile <16 x float> %arg2, <16 x float> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <16 x i32> %arg1, ptr addrspace(1) undef
+  store volatile <16 x float> %arg2, ptr addrspace(1) undef
   ret void
 }
 
@@ -2563,22 +2537,21 @@ define void @void_func_v3f32_wasted_reg(<3 x float> %arg0, i32 %arg1) #0 {
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p3) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(p3) = COPY [[DEF]](p3)
   ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
   ; CHECK-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
   ; CHECK-NEXT:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
-  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `float addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[COPY4]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
   %arg0.0 = extractelement <3 x float> %arg0, i32 0
   %arg0.1 = extractelement <3 x float> %arg0, i32 1
   %arg0.2 = extractelement <3 x float> %arg0, i32 2
-  store volatile float %arg0.0, float addrspace(3)* undef
-  store volatile float %arg0.1, float addrspace(3)* undef
-  store volatile float %arg0.2, float addrspace(3)* undef
-  store volatile i32 %arg1, i32 addrspace(3)* undef
+  store volatile float %arg0.0, ptr addrspace(3) undef
+  store volatile float %arg0.1, ptr addrspace(3) undef
+  store volatile float %arg0.2, ptr addrspace(3) undef
+  store volatile i32 %arg1, ptr addrspace(3) undef
   ret void
 }
 
@@ -2599,18 +2572,18 @@ define void @void_func_v3i32_wasted_reg(<3 x i32> %arg0, i32 %arg1) #0 {
   ; CHECK-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C]](s32)
   ; CHECK-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C1]](s32)
   ; CHECK-NEXT:   [[EVEC2:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<3 x s32>), [[C2]](s32)
-  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
-  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `i32 addrspace(3)* undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC1]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[EVEC2]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
+  ; CHECK-NEXT:   G_STORE [[COPY3]](s32), [[DEF]](p3) :: (volatile store (s32) into `ptr addrspace(3) undef`, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
   %arg0.0 = extractelement <3 x i32> %arg0, i32 0
   %arg0.1 = extractelement <3 x i32> %arg0, i32 1
   %arg0.2 = extractelement <3 x i32> %arg0, i32 2
-  store volatile i32 %arg0.0, i32 addrspace(3)* undef
-  store volatile i32 %arg0.1, i32 addrspace(3)* undef
-  store volatile i32 %arg0.2, i32 addrspace(3)* undef
-  store volatile i32 %arg1, i32 addrspace(3)* undef
+  store volatile i32 %arg0.0, ptr addrspace(3) undef
+  store volatile i32 %arg0.1, ptr addrspace(3) undef
+  store volatile i32 %arg0.2, ptr addrspace(3) undef
+  store volatile i32 %arg1, ptr addrspace(3) undef
   ret void
 }
 
@@ -2655,9 +2628,9 @@ define void @void_func_v16i8(<16 x i8> %arg0) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[TRUNC3]](s16), [[TRUNC4]](s16), [[TRUNC5]](s16), [[TRUNC6]](s16), [[TRUNC7]](s16), [[TRUNC8]](s16), [[TRUNC9]](s16), [[TRUNC10]](s16), [[TRUNC11]](s16), [[TRUNC12]](s16), [[TRUNC13]](s16), [[TRUNC14]](s16), [[TRUNC15]](s16)
   ; CHECK-NEXT:   [[TRUNC16:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR]](<16 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC16]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <16 x i8> %arg0, <16 x i8> addrspace(1)* undef
+  store volatile <16 x i8> %arg0, ptr addrspace(1) undef
   ret void
 }
 
@@ -2736,16 +2709,15 @@ define void @void_func_v32i32_v16i8(<32 x i32> %arg0, <16 x i8> %arg1) #0 {
   ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s16>) = G_BUILD_VECTOR [[LOAD1]](s16), [[LOAD2]](s16), [[LOAD3]](s16), [[LOAD4]](s16), [[LOAD5]](s16), [[LOAD6]](s16), [[LOAD7]](s16), [[LOAD8]](s16), [[LOAD9]](s16), [[LOAD10]](s16), [[LOAD11]](s16), [[LOAD12]](s16), [[LOAD13]](s16), [[LOAD14]](s16), [[LOAD15]](s16), [[LOAD16]](s16)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(<16 x s8>) = G_TRUNC [[BUILD_VECTOR1]](<16 x s16>)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY31:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `<32 x i32> addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](<16 x s8>), [[COPY31]](p1) :: (volatile store (<16 x s8>) into `<16 x i8> addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<32 x s32>), [[DEF]](p1) :: (volatile store (<32 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](<16 x s8>), [[DEF]](p1) :: (volatile store (<16 x s8>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store volatile <32 x i32> %arg0, <32 x i32> addrspace(1)* undef
-  store volatile <16 x i8> %arg1, <16 x i8> addrspace(1)* undef
+  store volatile <32 x i32> %arg0, ptr addrspace(1) undef
+  store volatile <16 x i8> %arg1, ptr addrspace(1) undef
   ret void
 }
 
-define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %arg0, i8 %pad, {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1) {
+define void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) {
   ; CHECK-LABEL: name: pointer_in_struct_argument
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
@@ -2762,28 +2734,25 @@ define void @pointer_in_struct_argument({i8 addrspace(3)*, i8 addrspace(1)*} %ar
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
   ; CHECK-NEXT:   [[MV1:%[0-9]+]]:_(p1234) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p1) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p1) = COPY [[C]](p1)
-  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[COPY7]](p1) :: (volatile store (p1) into `i8 addrspace(1)* addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[COPY8]](p1) :: (volatile store (s8) into `i8 addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `i8 addrspace(3)* addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[MV1]](p1234), [[COPY9]](p1) :: (volatile store (p1234) into `i8 addrspace(1234)* addrspace(1)* null`, addrspace 1)
-  ; CHECK-NEXT:   SI_RETURN
-  %val0 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 0
-  %val1 = extractvalue {i8 addrspace(3)*, i8 addrspace(1)*} %arg0, 1
-  %val2 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 0
-  %val3 = extractvalue {i8 addrspace(3)*, i8 addrspace(1234)*} %arg1, 1
-  store volatile i8 addrspace(3)* %val0, i8 addrspace(3)* addrspace(1)* null
-  store volatile i8 addrspace(1)* %val1, i8 addrspace(1)* addrspace(1)* null
-  store volatile i8 %pad, i8 addrspace(1)* null
-  store volatile i8 addrspace(3)* %val2, i8 addrspace(3)* addrspace(1)* null
-  store volatile i8 addrspace(1234)* %val3, i8 addrspace(1234)* addrspace(1)* null
-  ret void
-}
-
-define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg) {
+  ; CHECK-NEXT:   G_STORE [[COPY]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV]](p1), [[C]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC1]](s8), [[C]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY4]](p3), [[C]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[MV1]](p1234), [[C]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1)
+  ; CHECK-NEXT:   SI_RETURN
+  %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0
+  %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1
+  %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0
+  %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1
+  store volatile ptr addrspace(3) %val0, ptr addrspace(1) null
+  store volatile ptr addrspace(1) %val1, ptr addrspace(1) null
+  store volatile i8 %pad, ptr addrspace(1) null
+  store volatile ptr addrspace(3) %val2, ptr addrspace(1) null
+  store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null
+  ret void
+}
+
+define void @vector_ptr_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) {
   ; CHECK-LABEL: name: vector_ptr_in_struct_arg
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -2799,12 +2768,12 @@ define void @vector_ptr_in_struct_arg({ <2 x i8 addrspace(1)*>, <2 x i8 addrspac
   ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(p3) = COPY $vgpr5
   ; CHECK-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x p3>) = G_BUILD_VECTOR [[COPY4]](p3), [[COPY5]](p3)
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
-  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `{ <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef` + 16, align 16, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[BUILD_VECTOR1]](<2 x p3>), [[PTR_ADD]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  store { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } %arg, { <2 x i8 addrspace(1)*>, <2 x i8 addrspace(3)*> } addrspace(1)* undef
+  store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll
index 4611f170bf2fd..22035a5ac1756 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.v2i65.ll
@@ -2,6 +2,6 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -stop-after=irtranslator -verify-machineinstrs -o - %s
 
 define void @void_func_v2i65(<2 x i65> %arg0) #0 {
-  store <2 x i65> %arg0, <2 x i65> addrspace(1)* undef
+  store <2 x i65> %arg0, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
index 2e660884e7db1..2321cca252b83 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-getelementptr.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -stop-after=irtranslator -o - %s | FileCheck %s
 
 ; Test 64-bit pointer with 64-bit index
-define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) {
+define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64(<2 x ptr addrspace(1)> %ptr, <2 x i64> %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -32,12 +32,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64(<2 x i32 addrspace(1
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx
-  ret <2 x i32 addrspace(1)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> %idx
+  ret <2 x ptr addrspace(1)> %gep
 }
 
 ; Test 32-bit pointer with 32-bit index
-define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx) {
+define <2 x ptr addrspace(3)> @vector_gep_v2p3_index_v2i32(<2 x ptr addrspace(3)> %ptr, <2 x i32> %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p3_index_v2i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -57,12 +57,12 @@ define <2 x i32 addrspace(3)*> @vector_gep_v2p3_index_v2i32(<2 x i32 addrspace(3
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %gep = getelementptr i32, <2 x i32 addrspace(3)*> %ptr, <2 x i32> %idx
-  ret <2 x i32 addrspace(3)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(3)> %ptr, <2 x i32> %idx
+  ret <2 x ptr addrspace(3)> %gep
 }
 
 ; Test 64-bit pointer with 32-bit index
-define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx) {
+define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i32(<2 x ptr addrspace(1)> %ptr, <2 x i32> %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -89,12 +89,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i32(<2 x i32 addrspace(1
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i32> %idx
-  ret <2 x i32 addrspace(1)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i32> %idx
+  ret <2 x ptr addrspace(1)> %gep
 }
 
 ; Test 64-bit pointer with 64-bit scalar index
-define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*> %ptr, i64 %idx) {
+define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i64(<2 x ptr addrspace(1)> %ptr, i64 %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p1_index_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
@@ -122,12 +122,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i64(<2 x i32 addrspace(1)*
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i64 %idx
-  ret <2 x i32 addrspace(1)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i64 %idx
+  ret <2 x ptr addrspace(1)> %gep
 }
 
 ; Test 64-bit pointer with 32-bit scalar index
-define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*> %ptr, i32 %idx) {
+define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_i32(<2 x ptr addrspace(1)> %ptr, i32 %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p1_index_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
@@ -153,12 +153,12 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_i32(<2 x i32 addrspace(1)*
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, i32 %idx
-  ret <2 x i32 addrspace(1)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, i32 %idx
+  ret <2 x ptr addrspace(1)> %gep
 }
 
 ; Test 64-bit pointer with 64-bit constant, non-splat
-define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 addrspace(1)*> %ptr, <2 x i64> %idx) {
+define <2 x ptr addrspace(1)> @vector_gep_v2p1_index_v2i64_constant(<2 x ptr addrspace(1)> %ptr, <2 x i64> %idx) {
   ; CHECK-LABEL: name: vector_gep_v2p1_index_v2i64_constant
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
@@ -192,6 +192,6 @@ define <2 x i32 addrspace(1)*> @vector_gep_v2p1_index_v2i64_constant(<2 x i32 ad
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i32, <2 x i32 addrspace(1)*> %ptr, <2 x i64> <i64 1, i64 2>
-  ret <2 x i32 addrspace(1)*> %gep
+  %gep = getelementptr i32, <2 x ptr addrspace(1)> %ptr, <2 x i64> <i64 1, i64 2>
+  ret <2 x ptr addrspace(1)> %gep
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
index 3e8197462e313..72cf3a08e7ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-indirect-call.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -stop-after=irtranslator -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -enable-var-scope %s
 
-define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
+define amdgpu_kernel void @test_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_indirect_call_sgpr_ptr
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $vgpr1, $vgpr2, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -17,7 +17,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
   ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset.cast, align 16, addrspace 4)
+  ; CHECK-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p0) from %ir.fptr.kernarg.offset1, align 16, addrspace 4)
   ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]]
   ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -56,7 +56,7 @@ define amdgpu_kernel void @test_indirect_call_sgpr_ptr(void()* %fptr) {
   ret void
 }
 
-define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(void()* %fptr) {
+define amdgpu_gfx void @test_gfx_indirect_call_sgpr_ptr(ptr %fptr) {
   ; CHECK-LABEL: name: test_gfx_indirect_call_sgpr_ptr
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
index 89235e3f05de2..ac12b4f19bcc0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-inline-asm.ll
@@ -197,7 +197,7 @@ entry:
   ret float %0
 }
 
-define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind {
+define i32 @test_memory_constraint(ptr addrspace(3) %a) nounwind {
   ; CHECK-LABEL: name: test_memory_constraint
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0
@@ -207,7 +207,7 @@ define i32 @test_memory_constraint(i32 addrspace(3)* %a) nounwind {
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY %1
   ; CHECK-NEXT:   $vgpr0 = COPY [[COPY1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(i32 addrspace(3)* elementtype(i32) %a)
+  %1 = tail call i32 asm "ds_read_b32 $0, $1", "=v,*m"(ptr addrspace(3) elementtype(i32) %a)
   ret i32 %1
 }
 
@@ -265,17 +265,17 @@ define void @test_many_matching_constraints(i32 %a, i32 %b, i32 %c) nounwind {
   ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY %3
   ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY %4
   ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:_(s32) = COPY %5
-  ; CHECK-NEXT:   G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY6]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY7]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[COPY8]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
   %asm = call {i32, i32, i32} asm sideeffect "; ", "=v,=v,=v,0,2,1"(i32 %c, i32 %a, i32 %b)
   %asmresult0 = extractvalue  {i32, i32, i32} %asm, 0
-  store i32 %asmresult0, i32 addrspace(1)* undef
+  store i32 %asmresult0, ptr addrspace(1) undef
   %asmresult1 = extractvalue  {i32, i32, i32} %asm, 1
-  store i32 %asmresult1, i32 addrspace(1)* undef
+  store i32 %asmresult1, ptr addrspace(1) undef
   %asmresult2 = extractvalue  {i32, i32, i32} %asm, 2
-  store i32 %asmresult2, i32 addrspace(1)* undef
+  store i32 %asmresult2, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
index 45db580e15458..7c7a600c4bda2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
@@ -54,7 +54,7 @@ define { i32, i64 } @load_const_struct_gv() {
   ret { i32, i64 } %load
 }
 
-define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) {
+define void @test_memcpy_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_constaddr_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -68,11 +68,11 @@ define void @test_memcpy_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false)
+  call void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false)
   ret void
 }
 
-define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) {
+define void @test_memcpy_inline_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) {
   ; CHECK-LABEL: name: test_memcpy_inline_p1_constaddr_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -86,11 +86,11 @@ define void @test_memcpy_inline_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrs
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   G_MEMCPY_INLINE [[MV]](p1), [[MV1]](p4), [[C]](s64) :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.inline.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false)
+  call void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false)
   ret void
 }
 
-define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src) {
+define void @test_memmove_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) {
   ; CHECK-LABEL: name: test_memmove_p1_constaddr_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -104,7 +104,7 @@ define void @test_memmove_p1_constaddr_i64(i8 addrspace(1)* %dst, i8 addrspace(4
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[MV1]](p4), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1), (dereferenceable invariant load (s8) from %ir.src, addrspace 4)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memmove.p1.p4.i64(i8 addrspace(1)* %dst, i8 addrspace(4)* %src, i64 32, i1 false)
+  call void @llvm.memmove.p1.p4.i64(ptr addrspace(1) %dst, ptr addrspace(4) %src, i64 32, i1 false)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
index 8e90bd593a108..7fae9f8bb8cac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
@@ -3,7 +3,7 @@
 
 ; Size operand should be the minimum of the two pointer sizes.
 
-define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memcpy_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -16,11 +16,11 @@ define void @test_memcpy_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
   ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
+  call void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false)
   ret void
 }
 
-define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memcpy_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -32,11 +32,11 @@ define void @test_memcpy_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
   ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
+  call void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false)
   ret void
 }
 
-define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memcpy_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memcpy_p1_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -49,11 +49,11 @@ define void @test_memcpy_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
   ; CHECK-NEXT:   G_MEMCPY [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
+  call void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false)
   ret void
 }
 
-define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
+define void @test_memcpy_p3_p1_i64(ptr addrspace(3) %dst, ptr addrspace(1) %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -66,11 +66,11 @@ define void @test_memcpy_p3_p1_i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
   ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i64 256, i1 false)
+  call void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) %dst, ptr addrspace(1) %src, i64 256, i1 false)
   ret void
 }
 
-define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
+define void @test_memcpy_p3_p1_i32(ptr addrspace(3) %dst, ptr addrspace(1) %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -82,11 +82,11 @@ define void @test_memcpy_p3_p1_i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
   ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i32 256, i1 false)
+  call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) %dst, ptr addrspace(1) %src, i32 256, i1 false)
   ret void
 }
 
-define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src) {
+define void @test_memcpy_p3_p1_i16(ptr addrspace(3) %dst, ptr addrspace(1) %src) {
   ; CHECK-LABEL: name: test_memcpy_p3_p1_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -99,11 +99,11 @@ define void @test_memcpy_p3_p1_i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src)
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
   ; CHECK-NEXT:   G_MEMCPY [[COPY]](p3), [[MV]](p1), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3), (load (s8) from %ir.src, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* %dst, i8 addrspace(1)* %src, i16 256, i1 false)
+  call void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) %dst, ptr addrspace(1) %src, i16 256, i1 false)
   ret void
 }
 
-define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memmove_p1_p3_i64(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -116,11 +116,11 @@ define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
   ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[TRUNC]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i64 256, i1 false)
+  call void @llvm.memmove.p1.p3.i64(ptr addrspace(1) %dst, ptr addrspace(3) %src, i64 256, i1 false)
   ret void
 }
 
-define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memmove_p1_p3_i32(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -132,11 +132,11 @@ define void @test_memmove_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
   ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false)
+  call void @llvm.memmove.p1.p3.i32(ptr addrspace(1) %dst, ptr addrspace(3) %src, i32 256, i1 false)
   ret void
 }
 
-define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) {
+define void @test_memmove_p1_p3_i16(ptr addrspace(1) %dst, ptr addrspace(3) %src) {
   ; CHECK-LABEL: name: test_memmove_p1_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -149,11 +149,11 @@ define void @test_memmove_p1_p3_i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
   ; CHECK-NEXT:   G_MEMMOVE [[MV]](p1), [[COPY2]](p3), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 1), (load (s8) from %ir.src, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i16 256, i1 false)
+  call void @llvm.memmove.p1.p3.i16(ptr addrspace(1) %dst, ptr addrspace(3) %src, i16 256, i1 false)
   ret void
 }
 
-define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) {
+define void @test_memset_p1_i64(ptr addrspace(1) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -166,11 +166,11 @@ define void @test_memset_p1_i64(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 256
   ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[C]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p1i8.i64(i8 addrspace(1)* %dst, i8 %val, i64 256, i1 false)
+  call void @llvm.memset.p1.i64(ptr addrspace(1) %dst, i8 %val, i64 256, i1 false)
   ret void
 }
 
-define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) {
+define void @test_memset_p1_i32(ptr addrspace(1) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -184,11 +184,11 @@ define void @test_memset_p1_i32(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32)
   ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 256, i1 false)
+  call void @llvm.memset.p1.i32(ptr addrspace(1) %dst, i8 %val, i32 256, i1 false)
   ret void
 }
 
-define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) {
+define void @test_memset_p1_i16(ptr addrspace(1) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p1_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -202,11 +202,11 @@ define void @test_memset_p1_i16(i8 addrspace(1)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s16)
   ; CHECK-NEXT:   G_MEMSET [[MV]](p1), [[TRUNC]](s8), [[ZEXT]](s64), 0 :: (store (s8) into %ir.dst, addrspace 1)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p1i8.i16(i8 addrspace(1)* %dst, i8 %val, i16 256, i1 false)
+  call void @llvm.memset.p1.i16(ptr addrspace(1) %dst, i8 %val, i16 256, i1 false)
   ret void
 }
 
-define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) {
+define void @test_memset_p3_i64(ptr addrspace(3) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -218,11 +218,11 @@ define void @test_memset_p3_i64(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
   ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[TRUNC1]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p3i8.i64(i8 addrspace(3)* %dst, i8 %val, i64 256, i1 false)
+  call void @llvm.memset.p3.i64(ptr addrspace(3) %dst, i8 %val, i64 256, i1 false)
   ret void
 }
 
-define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) {
+define void @test_memset_p3_i32(ptr addrspace(3) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -233,11 +233,11 @@ define void @test_memset_p3_i32(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256
   ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[C]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %dst, i8 %val, i32 256, i1 false)
+  call void @llvm.memset.p3.i32(ptr addrspace(3) %dst, i8 %val, i32 256, i1 false)
   ret void
 }
 
-define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) {
+define void @test_memset_p3_i16(ptr addrspace(3) %dst, i8 %val) {
   ; CHECK-LABEL: name: test_memset_p3_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -249,25 +249,25 @@ define void @test_memset_p3_i16(i8 addrspace(3)* %dst, i8 %val) {
   ; CHECK-NEXT:   [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
   ; CHECK-NEXT:   G_MEMSET [[COPY]](p3), [[TRUNC]](s8), [[ZEXT]](s32), 0 :: (store (s8) into %ir.dst, addrspace 3)
   ; CHECK-NEXT:   SI_RETURN
-  call void @llvm.memset.p3i8.i16(i8 addrspace(3)* %dst, i8 %val, i16 256, i1 false)
+  call void @llvm.memset.p3.i16(ptr addrspace(3) %dst, i8 %val, i16 256, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.p1i8.p3i8.i64(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p1i8.p3i8.i16(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memset.p1i8.i64(i8 addrspace(1)* nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p1i8.i32(i8 addrspace(1)* nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p1i8.i16(i8 addrspace(1)* nocapture writeonly, i8, i16, i1 immarg) #1
-declare void @llvm.memset.p3i8.i64(i8 addrspace(3)* nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p3i8.i32(i8 addrspace(3)* nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p3i8.i16(i8 addrspace(3)* nocapture writeonly, i8, i16, i1 immarg) #1
+declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i32, i1 immarg) #0
+declare void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i16, i1 immarg) #0
+declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i32, i1 immarg) #0
+declare void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i16, i1 immarg) #0
+declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #0
+declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1 immarg) #0
+declare void @llvm.memmove.p1.p3.i16(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i16, i1 immarg) #0
+declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #1
+declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture writeonly, i8, i32, i1 immarg) #1
+declare void @llvm.memset.p1.i16(ptr addrspace(1) nocapture writeonly, i8, i16, i1 immarg) #1
+declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg) #1
+declare void @llvm.memset.p3.i32(ptr addrspace(3) nocapture writeonly, i8, i32, i1 immarg) #1
+declare void @llvm.memset.p3.i16(ptr addrspace(3) nocapture writeonly, i8, i16, i1 immarg) #1
 
 attributes #0 = { argmemonly nounwind willreturn }
 attributes #1 = { argmemonly nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll
index dab8b64e75f45..c182cff9329f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - -stop-after=irtranslator %s | FileCheck %s
 
 ; Check that the CSEMIRBuilder doesn't fold away the getelementptr during IRTranslator
-define i8 addrspace(7)* @no_auto_constfold_gep() {
+define ptr addrspace(7) @no_auto_constfold_gep() {
   ; CHECK-LABEL: name: no_auto_constfold_gep
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p7) = G_CONSTANT i64 0
@@ -12,12 +12,12 @@ define i8 addrspace(7)* @no_auto_constfold_gep() {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %gep = getelementptr i8, i8 addrspace(7)* null, i64 123
-  ret i8 addrspace(7)* %gep
+  %gep = getelementptr i8, ptr addrspace(7) null, i64 123
+  ret ptr addrspace(7) %gep
 }
 
 ; Check that the CSEMIRBuilder doesn't fold away the getelementptr during IRTranslator
-define <2 x i8 addrspace(7)*> @no_auto_constfold_gep_vector() {
+define <2 x ptr addrspace(7)> @no_auto_constfold_gep_vector() {
   ; CHECK-LABEL: name: no_auto_constfold_gep_vector
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   [[C:%[0-9]+]]:_(p7) = G_CONSTANT i64 0
@@ -32,6 +32,6 @@ define <2 x i8 addrspace(7)*> @no_auto_constfold_gep_vector() {
   ; CHECK-NEXT:   $vgpr2 = COPY [[UV2]](s32)
   ; CHECK-NEXT:   $vgpr3 = COPY [[UV3]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  %gep = getelementptr i8, <2 x i8 addrspace(7)*> zeroinitializer, <2 x i64> <i64 123, i64 123>
-  ret <2 x i8 addrspace(7)*> %gep
+  %gep = getelementptr i8, <2 x ptr addrspace(7)> zeroinitializer, <2 x i64> <i64 123, i64 123>
+  ret <2 x ptr addrspace(7)> %gep
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
index af0daceac6c61..8eb0658f8023b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -stop-after=irtranslator < %s | FileCheck %s
 
-define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) {
+define ptr @ptrmask_flat_i64(ptr %ptr, i64 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -17,11 +17,11 @@ define i8* @ptrmask_flat_i64(i8* %ptr, i64 %mask) {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %masked = call i8* @llvm.ptrmask.p0i8.i64(i8* %ptr, i64 %mask)
-  ret i8* %masked
+  %masked = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask)
+  ret ptr %masked
 }
 
-define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) {
+define ptr @ptrmask_flat_i32(ptr %ptr, i32 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -35,11 +35,11 @@ define i8* @ptrmask_flat_i32(i8* %ptr, i32 %mask) {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %masked = call i8* @llvm.ptrmask.p0i8.i32(i8* %ptr, i32 %mask)
-  ret i8* %masked
+  %masked = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 %mask)
+  ret ptr %masked
 }
 
-define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) {
+define ptr @ptrmask_flat_i16(ptr %ptr, i16 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -54,11 +54,11 @@ define i8* @ptrmask_flat_i16(i8* %ptr, i16 %mask) {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %masked = call i8* @llvm.ptrmask.p0i8.i16(i8* %ptr, i16 %mask)
-  ret i8* %masked
+  %masked = call ptr @llvm.ptrmask.p0.i16(ptr %ptr, i16 %mask)
+  ret ptr %masked
 }
 
-define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) {
+define ptr @ptrmask_flat_i1(ptr %ptr, i1 %mask) {
   ; CHECK-LABEL: name: ptrmask_flat_i1
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -73,11 +73,11 @@ define i8* @ptrmask_flat_i1(i8* %ptr, i1 %mask) {
   ; CHECK-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; CHECK-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0, implicit $vgpr1
-  %masked = call i8* @llvm.ptrmask.p0i8.i1(i8* %ptr, i1 %mask)
-  ret i8* %masked
+  %masked = call ptr @llvm.ptrmask.p0.i1(ptr %ptr, i1 %mask)
+  ret ptr %masked
 }
 
-define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) {
+define ptr addrspace(3) @ptrmask_local_i64(ptr addrspace(3) %ptr, i64 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i64
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
@@ -89,11 +89,11 @@ define i8 addrspace(3)* @ptrmask_local_i64(i8 addrspace(3)* %ptr, i64 %mask) {
   ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64)
   ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)* %ptr, i64 %mask)
-  ret i8 addrspace(3)* %masked
+  %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
+  ret ptr addrspace(3) %masked
 }
 
-define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) {
+define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i32
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -103,11 +103,11 @@ define i8 addrspace(3)* @ptrmask_local_i32(i8 addrspace(3)* %ptr, i32 %mask) {
   ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[COPY1]](s32)
   ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)* %ptr, i32 %mask)
-  ret i8 addrspace(3)* %masked
+  %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) %ptr, i32 %mask)
+  ret ptr addrspace(3) %masked
 }
 
-define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) {
+define ptr addrspace(3) @ptrmask_local_i16(ptr addrspace(3) %ptr, i16 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i16
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -118,11 +118,11 @@ define i8 addrspace(3)* @ptrmask_local_i16(i8 addrspace(3)* %ptr, i16 %mask) {
   ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
   ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)* %ptr, i16 %mask)
-  ret i8 addrspace(3)* %masked
+  %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask)
+  ret ptr addrspace(3) %masked
 }
 
-define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) {
+define ptr addrspace(3) @ptrmask_local_i1(ptr addrspace(3) %ptr, i1 %mask) {
   ; CHECK-LABEL: name: ptrmask_local_i1
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
@@ -133,21 +133,21 @@ define i8 addrspace(3)* @ptrmask_local_i1(i8 addrspace(3)* %ptr, i1 %mask) {
   ; CHECK-NEXT:   [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1)
   ; CHECK-NEXT:   $vgpr0 = COPY [[PTRMASK]](p3)
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
-  %masked = call i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)* %ptr, i1 %mask)
-  ret i8 addrspace(3)* %masked
+  %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3) %ptr, i1 %mask)
+  ret ptr addrspace(3) %masked
 }
 
 ; Seems to not work
-; define <2 x i8*> @ptrmask_flat_i64_v2(<2 x i8*> %ptr, <2 x i64> %mask) {
-;   %masked = call <2 x i8*> @llvm.ptrmask.v2p0i8.v2i64(<2 x i8*> %ptr, <2 x i64> %mask)
-;   ret <2 x i8*> %masked
+; define <2 x ptr> @ptrmask_flat_i64_v2(<2 x ptr> %ptr, <2 x i64> %mask) {
+;   %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> %mask)
+;   ret <2 x ptr> %masked
 ; }
 
-declare i8* @llvm.ptrmask.p0i8.i64(i8*, i64)
-declare i8* @llvm.ptrmask.p0i8.i32(i8*, i32)
-declare i8* @llvm.ptrmask.p0i8.i16(i8*, i16)
-declare i8* @llvm.ptrmask.p0i8.i1(i8*, i1)
-declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i64(i8 addrspace(3)*, i64)
-declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i32(i8 addrspace(3)*, i32)
-declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i16(i8 addrspace(3)*, i16)
-declare i8 addrspace(3)* @llvm.ptrmask.p3i8.i1(i8 addrspace(3)*, i1)
+declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
+declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
+declare ptr @llvm.ptrmask.p0.i16(ptr, i16)
+declare ptr @llvm.ptrmask.p0.i1(ptr, i1)
+declare ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3), i64)
+declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32)
+declare ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3), i16)
+declare ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3), i1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
index 127430f569e7c..2eab2a458e11e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
@@ -9,9 +9,9 @@
 ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.s.getreg)
 define amdgpu_kernel void @getreg_callsite_attributes() {
   %reg0 = call i32 @llvm.amdgcn.s.getreg(i32 0)
-  store volatile i32 %reg0, i32 addrspace(1)* undef
+  store volatile i32 %reg0, ptr addrspace(1) undef
   %reg1 = call i32 @llvm.amdgcn.s.getreg(i32 0) #1
-  store volatile i32 %reg1, i32 addrspace(1)* undef
+  store volatile i32 %reg1, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
index 20208f1336625..967979e76dd8a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll
@@ -32,8 +32,8 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 {
   ; GCN-NEXT:   $vgpr0 = COPY [[ADD]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %add0 = add i32 %arg0, %arg1
   ret i32 %add0
 }
@@ -78,8 +78,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b,
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32(i32 %a, i32 %b)
   ret i32 %ret
 }
@@ -105,8 +105,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_stack_object, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b)
   ret i32 %ret
 }
@@ -140,11 +140,9 @@ define amdgpu_kernel void @kernel_call_i32_fastcc_i32_i32_unused_result(i32 %a,
   ; GCN-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
   ; GCN-NEXT:   [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
-  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.0, align 16, addrspace 4)
+  ; GCN-NEXT:   [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<2 x s32>) from %ir.a.kernarg.offset1, align 16, addrspace 4)
   ; GCN-NEXT:   [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C]](s32)
   ; GCN-NEXT:   [[EVEC1:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[LOAD]](<2 x s32>), [[C1]](s32)
-  ; GCN-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-  ; GCN-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[INT]], [[C2]](s64)
   ; GCN-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
   ; GCN-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32
   ; GCN-NEXT:   $vgpr0 = COPY [[EVEC]](s32)
@@ -160,7 +158,7 @@ entry:
   ret void
 }
 
-define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval(i32) align 4 %arg1) #1 {
+define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, ptr addrspace(5) byval(i32) align 4 %arg1) #1 {
   ; GCN-LABEL: name: i32_fastcc_i32_byval_i32
   ; GCN: bb.1 (%ir-block.0):
   ; GCN-NEXT:   liveins: $vgpr0
@@ -172,13 +170,13 @@ define hidden fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)*
   ; GCN-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[LOAD]]
   ; GCN-NEXT:   $vgpr0 = COPY [[ADD]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
-  %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4
+  %arg1.load = load i32, ptr addrspace(5) %arg1, align 4
   %add0 = add i32 %arg0, %arg1.load
   ret i32 %add0
 }
 
 ; Tail call disallowed with byval in parent.
-define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i32 addrspace(5)* byval(i32) %b.byval, i32 %c) #1 {
+define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, ptr addrspace(5) byval(i32) %b.byval, i32 %c) #1 {
   ; GCN-LABEL: name: sibling_call_i32_fastcc_i32_byval_i32_byval_parent
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $vgpr0, $vgpr1
@@ -203,7 +201,7 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32_byval_parent(i32 %a, i3
   ; GCN-NEXT:   $vgpr0 = COPY [[COPY4]](s32)
   ; GCN-NEXT:   SI_RETURN implicit $vgpr0
 entry:
-  %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) %b.byval)
+  %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) %b.byval)
   ret i32 %ret
 }
 
@@ -255,13 +253,13 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_byval_i32(i32 %a, [32 x i32] %lar
   ; GCN-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_byval_i32
   ; GCN-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.0
   ; GCN-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `i32 addrspace(5)* inttoptr (i32 16 to i32 addrspace(5)*)`, align 16, addrspace 5)
+  ; GCN-NEXT:   G_MEMCPY [[FRAME_INDEX2]](p5), [[INTTOPTR]](p5), [[C1]](s32), 0 :: (dereferenceable store (s32) into %fixed-stack.0, align 16, addrspace 5), (dereferenceable load (s32) from `ptr addrspace(5) inttoptr (i32 16 to ptr addrspace(5))`, align 16, addrspace 5)
   ; GCN-NEXT:   $vgpr0 = COPY [[COPY]](s32)
   ; GCN-NEXT:   [[COPY31:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
   ; GCN-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY31]](<4 x s32>)
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_byval_i32, 0, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
-  %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, i32 addrspace(5)* byval(i32) inttoptr (i32 16 to i32 addrspace(5)*))
+  %ret = tail call fastcc i32 @i32_fastcc_i32_byval_i32(i32 %a, ptr addrspace(5) byval(i32) inttoptr (i32 16 to ptr addrspace(5)))
   ret i32 %ret
 }
 
@@ -498,8 +496,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
   ret i32 %ret
 }
@@ -693,8 +691,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] %c)
   ret i32 %ret
 }
@@ -798,13 +796,13 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @i32_fastcc_i32_i32_a32i32, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3
 entry:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 5
-  store volatile i32 9, i32 addrspace(5)* %gep
+  %gep = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 5
+  store volatile i32 9, ptr addrspace(5) %gep
   %ret = tail call fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %a, i32 %b, [32 x i32] zeroinitializer)
   ret i32 %ret
 }
 
-declare hidden void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) align 16, [2 x i64] addrspace(5)* byval([2 x i64]))
+declare hidden void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) align 16, ptr addrspace(5) byval([2 x i64]))
 
 define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
   ; GCN-LABEL: name: sibling_call_fastcc_multi_byval
@@ -965,13 +963,13 @@ define fastcc void @sibling_call_fastcc_multi_byval(i32 %a, [64 x i32]) #1 {
 entry:
   %alloca0 = alloca [3 x i32], align 16, addrspace(5)
   %alloca1 = alloca [2 x i64], align 8, addrspace(5)
-  store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca0
-  store [2 x i64] zeroinitializer, [2 x i64] addrspace(5)* %alloca1
-  tail call fastcc void @void_fastcc_multi_byval(i32 %a, [3 x i32] addrspace(5)* byval([3 x i32]) %alloca0, [2 x i64] addrspace(5)* byval([2 x i64]) %alloca1)
+  store [3 x i32] [i32 9, i32 9, i32 9], ptr addrspace(5) %alloca0
+  store [2 x i64] zeroinitializer, ptr addrspace(5) %alloca1
+  tail call fastcc void @void_fastcc_multi_byval(i32 %a, ptr addrspace(5) byval([3 x i32]) %alloca0, ptr addrspace(5) byval([2 x i64]) %alloca1)
   ret void
 }
 
-declare hidden void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) align 16, [32 x i32], i32)
+declare hidden void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) align 16, [32 x i32], i32)
 
 ; Callee has a byval and non-byval stack passed argument
 define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64 x i32]) #1 {
@@ -1159,8 +1157,8 @@ define fastcc void @sibling_call_byval_and_stack_passed(i32 %stack.out.arg, [64
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @void_fastcc_byval_and_stack_passed, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
 entry:
   %alloca = alloca [3 x i32], align 16, addrspace(5)
-  store [3 x i32] [i32 9, i32 9, i32 9], [3 x i32] addrspace(5)* %alloca
-  tail call fastcc void @void_fastcc_byval_and_stack_passed([3 x i32] addrspace(5)* byval([3 x i32]) %alloca, [32 x i32] zeroinitializer, i32 %stack.out.arg)
+  store [3 x i32] [i32 9, i32 9, i32 9], ptr addrspace(5) %alloca
+  tail call fastcc void @void_fastcc_byval_and_stack_passed(ptr addrspace(5) byval([3 x i32]) %alloca, [32 x i32] zeroinitializer, i32 %stack.out.arg)
   ret void
 }
 
@@ -1213,9 +1211,9 @@ entry:
   ret i64 %ret
 }
 
-declare hidden fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %arg0)
+declare hidden fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %arg0)
 
-define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspace(1)* %a) #1 {
+define hidden fastcc ptr addrspace(1) @sibling_call_p1i8_fastcc_p1i8(ptr addrspace(1) %a) #1 {
   ; GCN-LABEL: name: sibling_call_p1i8_fastcc_p1i8
   ; GCN: bb.1.entry:
   ; GCN-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr0, $vgpr1, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11
@@ -1258,8 +1256,8 @@ define hidden fastcc i8 addrspace(1)* @sibling_call_p1i8_fastcc_p1i8(i8 addrspac
   ; GCN-NEXT:   $vgpr31 = COPY [[COPY19]](s32)
   ; GCN-NEXT:   SI_TCRETURN [[GV]](p0), @p1i8_fastcc_p1i8, 0, csr_amdgpu, implicit $vgpr0, implicit $vgpr1, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31
 entry:
-  %ret = tail call fastcc i8 addrspace(1)* @p1i8_fastcc_p1i8(i8 addrspace(1)* %a)
-  ret i8 addrspace(1)* %ret
+  %ret = tail call fastcc ptr addrspace(1) @p1i8_fastcc_p1i8(ptr addrspace(1) %a)
+  ret ptr addrspace(1) %ret
 }
 
 declare hidden fastcc i16 @i16_fastcc_i16(i16 %arg0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
index 403375d6389f8..cb81871be3a58 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
@@ -11,17 +11,16 @@ define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
-  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(p1) = COPY [[DEF]](p1)
   ; CHECK-NEXT:   [[INT:%[0-9]+]]:_(s32), [[INT1:%[0-9]+]]:_(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY]](s32), [[COPY1]](s32), -1
   ; CHECK-NEXT:   [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[INT1]](s1)
-  ; CHECK-NEXT:   G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `float addrspace(1)* undef`, addrspace 1)
-  ; CHECK-NEXT:   G_STORE [[SEXT]](s32), [[COPY2]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[INT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[SEXT]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   %call = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %arg0, float %arg1, i1 true)
   %extract0 = extractvalue { float, i1 } %call, 0
   %extract1 = extractvalue { float, i1 } %call, 1
   %ext = sext i1 %extract1 to i32
-  store float %extract0, float addrspace(1)* undef
-  store i32 %ext, i32 addrspace(1)* undef
+  store float %extract0, ptr addrspace(1) undef
+  store i32 %ext, ptr addrspace(1) undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
index 72c0adcd91ccf..ea0376afe8ec0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-non-entry-func.ll
@@ -28,7 +28,7 @@ define void @func_use_lds_global() {
 ; GFX9-NEXT:    ds_write_b32 v0, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  store float 0.0, float addrspace(3)* @lds, align 4
+  store float 0.0, ptr addrspace(3) @lds, align 4
   ret void
 }
 
@@ -50,6 +50,6 @@ define void @func_use_lds_global_constexpr_cast() {
 ; GFX9-NEXT:    global_store_dword v[0:1], v0, off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
-  store i32 ptrtoint (float addrspace(3)* @lds to i32), i32 addrspace(1)* undef, align 4
+  store i32 ptrtoint (ptr addrspace(3) @lds to i32), ptr addrspace(1) undef, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
index 7bf9e3fcda308..2f718814ef77b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-global-value.ll
@@ -5,7 +5,7 @@
 @lds_512_4 = internal unnamed_addr addrspace(3) global [128 x i32] undef, align 4
 @lds_4_8 = addrspace(3) global i32 undef, align 8
 
-define amdgpu_kernel void @use_lds_globals(i32 addrspace(1)* %out, i32 addrspace(3)* %in) #0 {
+define amdgpu_kernel void @use_lds_globals(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
 ; CHECK-LABEL: use_lds_globals:
 ; CHECK:       ; %bb.0: ; %entry
 ; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -23,11 +23,11 @@ define amdgpu_kernel void @use_lds_globals(i32 addrspace(1)* %out, i32 addrspace
 ; CHECK-NEXT:    ds_write_b32 v0, v3
 ; CHECK-NEXT:    s_endpgm
 entry:
-  %tmp0 = getelementptr [128 x i32], [128 x i32] addrspace(3)* @lds_512_4, i32 0, i32 1
-  %tmp1 = load i32, i32 addrspace(3)* %tmp0
-  %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1
-  store i32 %tmp1, i32 addrspace(1)* %tmp2
-  store i32 9, i32 addrspace(3)* @lds_4_8
+  %tmp0 = getelementptr [128 x i32], ptr addrspace(3) @lds_512_4, i32 0, i32 1
+  %tmp1 = load i32, ptr addrspace(3) %tmp0
+  %tmp2 = getelementptr i32, ptr addrspace(1) %out, i32 1
+  store i32 %tmp1, ptr addrspace(1) %tmp2
+  store i32 9, ptr addrspace(3) @lds_4_8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
index fc4c3f710c28f..2921246d9adef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
@@ -10,17 +10,16 @@
 ; GCN-LABEL: test_local_misaligned_v2:
 ; GCN-DAG: ds_{{read2|load_2addr}}_b32
 ; GCN-DAG: ds_{{write2|store_2addr}}_b32
-define amdgpu_kernel void @test_local_misaligned_v2(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_misaligned_v2(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)*
-  %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 4
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <2 x i32>, ptr addrspace(3) %gep, align 4
   %v1 = extractelement <2 x i32> %load, i32 0
   %v2 = extractelement <2 x i32> %load, i32 1
   %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
   %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
-  store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 4
+  store <2 x i32> %v4, ptr addrspace(3) %gep, align 4
   ret void
 }
 
@@ -31,12 +30,11 @@ bb:
 ; ALIGNED-DAG: ds_{{write2|store_2addr}}_b32
 ; UNALIGNED-DAG: ds_{{read2|load_2addr}}_b64
 ; UNALIGNED-DAG: ds_{{write2|store_2addr}}_b64
-define amdgpu_kernel void @test_local_misaligned_v4(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_misaligned_v4(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)*
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <4 x i32>, ptr addrspace(3) %gep, align 4
   %v1 = extractelement <4 x i32> %load, i32 0
   %v2 = extractelement <4 x i32> %load, i32 1
   %v3 = extractelement <4 x i32> %load, i32 2
@@ -45,7 +43,7 @@ bb:
   %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
   %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
   %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
-  store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 4
+  store <4 x i32> %v8, ptr addrspace(3) %gep, align 4
   ret void
 }
 
@@ -56,55 +54,52 @@ bb:
 ; ALIGNED-DAG: ds_{{write|store}}_b32
 ; UNALIGNED-DAG: ds_{{read|load}}_b96
 ; UNALIGNED-DAG: ds_{{write|store}}_b96
-define amdgpu_kernel void @test_local_misaligned_v3(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_misaligned_v3(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)*
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <3 x i32>, ptr addrspace(3) %gep, align 4
   %v1 = extractelement <3 x i32> %load, i32 0
   %v2 = extractelement <3 x i32> %load, i32 1
   %v3 = extractelement <3 x i32> %load, i32 2
   %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0
   %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1
   %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2
-  store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 4
+  store <3 x i32> %v7, ptr addrspace(3) %gep, align 4
   ret void
 }
 
 ; GCN-LABEL: test_local_aligned_v2:
 ; GCN-DAG: ds_{{read|load}}_b64
 ; GCN-DAG: ds_{{write|store}}_b64
-define amdgpu_kernel void @test_local_aligned_v2(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_aligned_v2(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <2 x i32> addrspace(3)*
-  %load = load <2 x i32>, <2 x i32> addrspace(3)* %ptr, align 8
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <2 x i32>, ptr addrspace(3) %gep, align 8
   %v1 = extractelement <2 x i32> %load, i32 0
   %v2 = extractelement <2 x i32> %load, i32 1
   %v3 = insertelement <2 x i32> undef, i32 %v2, i32 0
   %v4 = insertelement <2 x i32> %v3, i32 %v1, i32 1
-  store <2 x i32> %v4, <2 x i32> addrspace(3)* %ptr, align 8
+  store <2 x i32> %v4, ptr addrspace(3) %gep, align 8
   ret void
 }
 
 ; GCN-LABEL: test_local_aligned_v3:
 ; GCN-DAG: ds_{{read|load}}_b96
 ; GCN-DAG: ds_{{write|store}}_b96
-define amdgpu_kernel void @test_local_aligned_v3(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_aligned_v3(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <3 x i32> addrspace(3)*
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <3 x i32>, ptr addrspace(3) %gep, align 16
   %v1 = extractelement <3 x i32> %load, i32 0
   %v2 = extractelement <3 x i32> %load, i32 1
   %v3 = extractelement <3 x i32> %load, i32 2
   %v5 = insertelement <3 x i32> undef, i32 %v3, i32 0
   %v6 = insertelement <3 x i32> %v5, i32 %v1, i32 1
   %v7 = insertelement <3 x i32> %v6, i32 %v2, i32 2
-  store <3 x i32> %v7, <3 x i32> addrspace(3)* %ptr, align 16
+  store <3 x i32> %v7, ptr addrspace(3) %gep, align 16
   ret void
 }
 
@@ -117,12 +112,11 @@ bb:
 ; ALIGNED-CU-DAG: ds_{{write2|store_2addr}}_b64
 ; UNALIGNED-DAG: ds_{{read2|load_2addr}}_b64
 ; UNALIGNED-DAG: ds_{{write2|store_2addr}}_b64
-define amdgpu_kernel void @test_local_v4_aligned8(i32 addrspace(3)* %arg) {
+define amdgpu_kernel void @test_local_v4_aligned8(ptr addrspace(3) %arg) {
 bb:
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i32, i32 addrspace(3)* %arg, i32 %lid
-  %ptr = bitcast i32 addrspace(3)* %gep to <4 x i32> addrspace(3)*
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8
+  %gep = getelementptr inbounds i32, ptr addrspace(3) %arg, i32 %lid
+  %load = load <4 x i32>, ptr addrspace(3) %gep, align 8
   %v1 = extractelement <4 x i32> %load, i32 0
   %v2 = extractelement <4 x i32> %load, i32 1
   %v3 = extractelement <4 x i32> %load, i32 2
@@ -131,7 +125,7 @@ bb:
   %v6 = insertelement <4 x i32> %v5, i32 %v3, i32 1
   %v7 = insertelement <4 x i32> %v6, i32 %v2, i32 2
   %v8 = insertelement <4 x i32> %v7, i32 %v1, i32 3
-  store <4 x i32> %v8, <4 x i32> addrspace(3)* %ptr, align 8
+  store <4 x i32> %v8, ptr addrspace(3) %gep, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
index 1f76655d935f6..cd536e2336cac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-relocs.ll
@@ -14,11 +14,11 @@
 ; GCN: .amdgpu_lds lds.defined, 32, 8
 define amdgpu_gs float @test_basic(i32 inreg %wave, i32 %arg1) #0 {
 main_body:
-  %gep0 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @lds.external, i32 0, i32 %arg1
-  %tmp = load i32, i32 addrspace(3)* %gep0
+  %gep0 = getelementptr [0 x i32], ptr addrspace(3) @lds.external, i32 0, i32 %arg1
+  %tmp = load i32, ptr addrspace(3) %gep0
 
-  %gep1 = getelementptr [8 x i32], [8 x i32] addrspace(3)* @lds.defined, i32 0, i32 %wave
-  store i32 123, i32 addrspace(3)* %gep1
+  %gep1 = getelementptr [8 x i32], ptr addrspace(3) @lds.defined, i32 0, i32 %wave
+  store i32 123, ptr addrspace(3) %gep1
 
   %r = bitcast i32 %tmp to float
   ret float %r
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
index 4f45c34bfb53d..156b6b77571ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-zero-initializer.ll
@@ -8,7 +8,7 @@
 
 @lds = addrspace(3) global [256 x i32] zeroinitializer
 
-define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %p) {
+define amdgpu_kernel void @load_zeroinit_lds_global(ptr addrspace(1) %out, i1 %p) {
   ; GCN-LABEL: name: load_zeroinit_lds_global
   ; GCN: bb.1 (%ir-block.0):
   ; GCN:   liveins: $sgpr0_sgpr1
@@ -31,8 +31,8 @@ define amdgpu_kernel void @load_zeroinit_lds_global(i32 addrspace(1)* %out, i1 %
   ; GFX9:  [[COPY2:%[0-9]+]]:vreg_64 = COPY [[S_LOAD_DWORDX2_IMM]]
   ; GFX9:  FLAT_STORE_DWORD [[COPY2]], [[DS_READ_B32_]], 0, 0, implicit $exec, implicit $flat_scr
   ; GCN:   S_ENDPGM 0
- %gep = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds, i32 0, i32 10
-  %ld = load i32, i32 addrspace(3)* %gep
-  store i32 %ld, i32 addrspace(1)* %out
+ %gep = getelementptr [256 x i32], ptr addrspace(3) @lds, i32 0, i32 10
+  %ld = load i32, ptr addrspace(3) %gep
+  store i32 %ld, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
index e86e224bc2b32..a8296dbeb2177 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
@@ -3278,7 +3278,7 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX9-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
   ; GFX9-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX9-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX9-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX9-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3305,7 +3305,7 @@ define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX10NSA-NEXT:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
   ; GFX10NSA-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[TRUNC]](s16), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10NSA-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX10NSA-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10NSA-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX10NSA-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3316,7 +3316,7 @@ main_body:
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 15, i16 %s, <8 x i32> %rsrc, i32 1, i32 0)
   %data = extractvalue { <4 x float>, i32 } %v, 0
   %tfe = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %data
 }
 
@@ -3347,7 +3347,7 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX9-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
   ; GFX9-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX9-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX9-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX9-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3379,7 +3379,7 @@ define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX10NSA-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
   ; GFX10NSA-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s16>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10NSA-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX10NSA-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10NSA-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX10NSA-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3391,7 +3391,7 @@ main_body:
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %data = extractvalue { <4 x float>, i32 } %v, 0
   %tfe = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %data
 }
 
@@ -3429,7 +3429,7 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX9-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
   ; GFX9-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX9-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX9-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX9-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3468,7 +3468,7 @@ define amdgpu_ps <4 x float> @load_3d_tfe(<8 x i32> inreg %rsrc, <2 x i16> %coor
   ; GFX10NSA-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
   ; GFX10NSA-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10NSA-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX10NSA-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10NSA-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX10NSA-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3481,7 +3481,7 @@ main_body:
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
   %data = extractvalue { <4 x float>, i32 } %v, 0
   %tfe = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %data
 }
 
@@ -3521,7 +3521,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i
   ; GFX9-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
   ; GFX9-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX9-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX9-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX9-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX9-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX9-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3562,7 +3562,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, <2 x i
   ; GFX10NSA-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>)
   ; GFX10NSA-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2darraymsaa), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GFX10NSA-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX10NSA-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX10NSA-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10NSA-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GFX10NSA-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -3576,7 +3576,7 @@ main_body:
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
   %data = extractvalue { <4 x float>, i32 } %v, 0
   %tfe = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %data
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index cbf4c9bcefc50..d52653262d35d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -244,7 +244,7 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t)
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; PACKED-LABEL: name: image_load_tfe_f16
@@ -266,13 +266,13 @@ define amdgpu_ps half @image_load_tfe_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t)
   ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { half, i32 } %res, 0
   %tfe = extractvalue { half, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret half %tex
 }
 
@@ -303,7 +303,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s,
   ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C1]](s32)
   ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
   ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; PACKED-LABEL: name: image_load_tfe_v2f16
@@ -326,13 +326,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16(<8 x i32> inreg %rsrc, i32 %s,
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x half>, i32 } %res, 0
   %tfe = extractvalue { <2 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x half> %tex
 }
 
@@ -356,7 +356,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -394,7 +394,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
   ; PACKED-NEXT:   [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[BITCAST1]](<2 x s16>)
-  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; PACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[BITCAST2]], [[C]]
   ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -408,7 +408,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16(<8 x i32> inreg %rsrc, i32 %s,
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
   %tfe = extractvalue { <3 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x half> %tex
 }
 
@@ -432,7 +432,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s,
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s16>) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<5 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -469,14 +469,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16(<8 x i32> inreg %rsrc, i32 %s,
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x half>, i32 } %res, 0
   %tfe = extractvalue { <4 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x half> %tex
 }
 
@@ -959,7 +959,7 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; PACKED-LABEL: name: image_load_tfe_f16_dmask_0000
@@ -981,13 +981,13 @@ define amdgpu_ps half @image_load_tfe_f16_dmask_0000(<8 x i32> inreg %rsrc, i32
   ; PACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { half, i32 } %res, 0
   %tfe = extractvalue { half, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret half %tex
 }
 
@@ -1018,7 +1018,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
   ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
   ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_1000
@@ -1041,13 +1041,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_1000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x half>, i32 } %res, 0
   %tfe = extractvalue { <2 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x half> %tex
 }
 
@@ -1078,7 +1078,7 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C2]](s32)
   ; UNPACKED-NEXT:   [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
   ; UNPACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; UNPACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; PACKED-LABEL: name: image_load_tfe_v2f16_dmask_0000
@@ -1101,13 +1101,13 @@ define amdgpu_ps <2 x half> @image_load_tfe_v2f16_dmask_0000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x half>, i32 } %res, 0
   %tfe = extractvalue { <2 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x half> %tex
 }
 
@@ -1131,7 +1131,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -1166,7 +1166,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
   ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
@@ -1178,7 +1178,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1100(<8 x i32> inreg %rs
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
   %tfe = extractvalue { <3 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x half> %tex
 }
 
@@ -1202,7 +1202,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -1235,7 +1235,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
   ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
@@ -1247,7 +1247,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_1000(<8 x i32> inreg %rs
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
   %tfe = extractvalue { <3 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x half> %tex
 }
 
@@ -1271,7 +1271,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -1304,7 +1304,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
   ; PACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
   ; PACKED-NEXT:   [[SHL:%[0-9]+]]:_(s32) = G_SHL [[C]], [[C1]](s32)
@@ -1316,7 +1316,7 @@ define amdgpu_ps <3 x half> @image_load_tfe_v3f16_dmask_0000(<8 x i32> inreg %rs
   %res = call { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x half>, i32 } %res, 0
   %tfe = extractvalue { <3 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x half> %tex
 }
 
@@ -1340,7 +1340,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s16>) from custom "ImageResource", align 8)
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<4 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -1377,14 +1377,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1110(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV1]](s32)
-  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   $vgpr1 = COPY [[BITCAST1]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x half>, i32 } %res, 0
   %tfe = extractvalue { <4 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x half> %tex
 }
 
@@ -1408,7 +1408,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s16>) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<3 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[AND1:%[0-9]+]]:_(s32) = G_AND [[UV1]], [[C]]
@@ -1444,14 +1444,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1100(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x half>, i32 } %res, 0
   %tfe = extractvalue { <4 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x half> %tex
 }
 
@@ -1475,7 +1475,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -1509,14 +1509,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_1000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x half>, i32 } %res, 0
   %tfe = extractvalue { <4 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x half> %tex
 }
 
@@ -1540,7 +1540,7 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs
   ; UNPACKED-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; UNPACKED-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD_D16_:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD_D16 intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s16) from custom "ImageResource")
   ; UNPACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
-  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; UNPACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; UNPACKED-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
   ; UNPACKED-NEXT:   [[AND:%[0-9]+]]:_(s32) = G_AND [[UV]], [[C]]
   ; UNPACKED-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
@@ -1574,14 +1574,14 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs
   ; PACKED-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD_D16_]](<2 x s32>)
   ; PACKED-NEXT:   [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[UV]](s32)
   ; PACKED-NEXT:   [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
-  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; PACKED-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; PACKED-NEXT:   $vgpr0 = COPY [[BITCAST]](<2 x s16>)
   ; PACKED-NEXT:   $vgpr1 = COPY [[DEF1]](<2 x s16>)
   ; PACKED-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x half>, i32 } %res, 0
   %tfe = extractvalue { <4 x half>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x half> %tex
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
index e20e567fae039..46470e44805b1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
@@ -126,13 +126,13 @@ define amdgpu_ps float @image_load_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32 %t
   ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { float, i32 } %res, 0
   %tfe = extractvalue { float, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret float %tex
 }
 
@@ -156,14 +156,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32(<8 x i32> inreg %rsrc, i32 %s
   ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
-  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x float>, i32 } %res, 0
   %tfe = extractvalue { <2 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x float> %tex
 }
 
@@ -187,7 +187,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s
   ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16)
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -195,7 +195,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32(<8 x i32> inreg %rsrc, i32 %s
   %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x float>, i32 } %res, 0
   %tfe = extractvalue { <3 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x float> %tex
 }
 
@@ -219,7 +219,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s
   ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<5 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 15, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<5 x s32>)
-  ; GCN-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV4]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -228,7 +228,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32(<8 x i32> inreg %rsrc, i32 %s
   %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x float>, i32 } %res, 0
   %tfe = extractvalue { <4 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %tex
 }
 
@@ -484,13 +484,13 @@ define amdgpu_ps float @image_load_tfe_f32_dmask_0000(<8 x i32> inreg %rsrc, i32
   ; GCN-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32)
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %res = call { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { float, i32 } %res, 0
   %tfe = extractvalue { float, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret float %tex
 }
 
@@ -515,14 +515,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_1000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x float>, i32 } %res, 0
   %tfe = extractvalue { <2 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x float> %tex
 }
 
@@ -547,14 +547,14 @@ define amdgpu_ps <2 x float> @image_load_tfe_v2f32_dmask_0000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1
   %res = call { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <2 x float>, i32 } %res, 0
   %tfe = extractvalue { <2 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <2 x float> %tex
 }
 
@@ -579,7 +579,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -587,7 +587,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1100(<8 x i32> inreg %r
   %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x float>, i32 } %res, 0
   %tfe = extractvalue { <3 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x float> %tex
 }
 
@@ -612,7 +612,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -620,7 +620,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_1000(<8 x i32> inreg %r
   %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x float>, i32 } %res, 0
   %tfe = extractvalue { <3 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x float> %tex
 }
 
@@ -645,7 +645,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -653,7 +653,7 @@ define amdgpu_ps <3 x float> @image_load_tfe_v3f32_dmask_0000(<8 x i32> inreg %r
   %res = call { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <3 x float>, i32 } %res, 0
   %tfe = extractvalue { <3 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <3 x float> %tex
 }
 
@@ -678,7 +678,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 7, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<3 x s32>) from custom "ImageResource", align 16)
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV3]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[UV2]](s32)
@@ -687,7 +687,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1110(<8 x i32> inreg %r
   %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x float>, i32 } %res, 0
   %tfe = extractvalue { <4 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %tex
 }
 
@@ -712,7 +712,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<3 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 3, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (<2 x s32>) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<3 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV2]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[UV1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -721,7 +721,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1100(<8 x i32> inreg %r
   %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x float>, i32 } %res, 0
   %tfe = extractvalue { <4 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %tex
 }
 
@@ -746,7 +746,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -755,7 +755,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_1000(<8 x i32> inreg %r
   %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x float>, i32 } %res, 0
   %tfe = extractvalue { <4 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %tex
 }
 
@@ -780,7 +780,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r
   ; GCN-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.2d), 1, [[BUILD_VECTOR1]](<2 x s32>), $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GCN-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
   ; GCN-NEXT:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GCN-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GCN-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GCN-NEXT:   $vgpr1 = COPY [[DEF1]](s32)
   ; GCN-NEXT:   $vgpr2 = COPY [[DEF1]](s32)
@@ -789,7 +789,7 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r
   %res = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 0, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { <4 x float>, i32 } %res, 0
   %tfe = extractvalue { <4 x float>, i32 } %res, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret <4 x float> %tex
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
index 8421dcf991c94..b5da2f92fdea7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
@@ -57,7 +57,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i3
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
   ; GFX6-LABEL: name: load_2darraymsaa_tfe
   ; GFX6: bb.1 (%ir-block.0):
   ; GFX6-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -118,7 +118,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 ad
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
index 8e41a41219776..a62d1b5321409 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
@@ -68,7 +68,7 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32
   ; GFX6-NEXT:   [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32)
   ; GFX6-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[BUILD_VECTOR1]](<3 x s32>), $noreg, $noreg, [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GFX6-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
-  ; GFX6-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX6-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX6-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX6-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   ; GFX10NSA-LABEL: name: image_load_3d_tfe_f32
@@ -90,13 +90,13 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32
   ; GFX10NSA-NEXT:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
   ; GFX10NSA-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<2 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.3d), 1, [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[BUILD_VECTOR]](<8 x s32>), 1, 0, 0 :: (dereferenceable load (s32) from custom "ImageResource")
   ; GFX10NSA-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<2 x s32>)
-  ; GFX10NSA-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `i32 addrspace(1)* undef`, addrspace 1)
+  ; GFX10NSA-NEXT:   G_STORE [[UV1]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1)
   ; GFX10NSA-NEXT:   $vgpr0 = COPY [[UV]](s32)
   ; GFX10NSA-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
   %val = call { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 1, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0)
   %tex = extractvalue { float, i32 } %val, 0
   %tfe = extractvalue { float, i32 } %val, 1
-  store i32 %tfe, i32 addrspace(1)* undef
+  store i32 %tfe, ptr addrspace(1) undef
   ret float %tex
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
index 474d2e4c8418d..9fa0376197ecd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.dec.ll
@@ -6,17 +6,17 @@
 ; register bank copies, and no return optimization is missing.
 
 
-declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
 
-declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -59,12 +59,12 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -108,13 +108,13 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out,
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind {
 ; GCN-LABEL: lds_atomic_dec_noret_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -151,11 +151,11 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun
 ; VI-NEXT:    v_mov_b32_e32 v1, s0
 ; VI-NEXT:    ds_dec_u32 v1, v0
 ; VI-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -184,12 +184,12 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    ds_dec_rtn_u32 v0, v0, v1
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -230,12 +230,12 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -282,13 +282,13 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -317,11 +317,11 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -356,12 +356,12 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)*
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -428,15 +428,15 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace
 ; GFX9-NEXT:    global_store_dword v[2:3], v0, off
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
-  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out.gep
+  %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
+  %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -486,13 +486,13 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspa
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
-  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
+  %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -533,12 +533,12 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_dword v[0:1], v2
 ; GFX9-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -585,13 +585,13 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_dword v[0:1], v2
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out
+  %gep = getelementptr i32, ptr %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -620,11 +620,11 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -659,12 +659,12 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -731,15 +731,15 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32*
 ; GFX9-NEXT:    flat_store_dword v[2:3], v0
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
-  %out.gep = getelementptr i32, i32* %out, i32 %id
-  %gep = getelementptr i32, i32* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out.gep
+  %gep.tid = getelementptr i32, ptr %ptr, i32 %id
+  %out.gep = getelementptr i32, ptr %out, i32 %id
+  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out.gep
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -789,13 +789,13 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0
 ; GFX9-NEXT:    flat_atomic_dec v0, v[0:1], v2 glc
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
-  %gep = getelementptr i32, i32* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i32, ptr %ptr, i32 %id
+  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -839,12 +839,12 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -894,13 +894,13 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr)
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out
+  %gep = getelementptr i64, ptr %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -932,11 +932,11 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind {
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -974,12 +974,12 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1049,15 +1049,15 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64*
 ; GFX9-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
-  %out.gep = getelementptr i64, i64* %out, i32 %id
-  %gep = getelementptr i64, i64* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out.gep
+  %gep.tid = getelementptr i64, ptr %ptr, i32 %id
+  %out.gep = getelementptr i64, ptr %out, i32 %id
+  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out.gep
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_dec_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1110,15 +1110,15 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
-  %gep = getelementptr i64, i64* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i64, ptr %ptr, i32 %id
+  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
 @lds0 = internal addrspace(3) global [512 x i32] undef
 
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ; CI-LABEL: atomic_dec_shl_base_lds_0:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1171,14 +1171,14 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32
 ; GFX9-NEXT:    s_endpgm
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
-  store i32 %idx.0, i32 addrspace(1)* %add_use
-  store i32 %val0, i32 addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false)
+  store i32 %idx.0, ptr addrspace(1) %add_use
+  store i32 %val0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -1224,12 +1224,12 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -1276,13 +1276,13 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out,
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -1313,11 +1313,11 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    ds_dec_rtn_u64 v[0:1], v2, v[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -1349,12 +1349,12 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX9-NEXT:    ds_dec_rtn_u64 v[0:1], v2, v[0:1]
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1398,12 +1398,12 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1453,13 +1453,13 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1491,11 +1491,11 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_dec_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1533,12 +1533,12 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)*
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1608,15 +1608,15 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
-  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out.gep
+  %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
+  %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_dec_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1669,15 +1669,15 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa
 ; GFX9-NEXT:    flat_atomic_dec_x2 v[0:1], v[0:1], v[2:3] glc
 ; GFX9-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
-  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
+  %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ; CI-LABEL: atomic_dec_shl_base_lds_0_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1733,10 +1733,10 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; GFX9-NEXT:    s_endpgm
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
-  store i32 %idx.0, i32 addrspace(1)* %add_use
-  store i64 %val0, i64 addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
+  %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false)
+  store i32 %idx.0, ptr addrspace(1) %add_use
+  store i64 %val0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index d041f2e296b1d..1eddf8666e6a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -9,17 +9,17 @@
 ; register bank copies, and no return optimization is missing.
 
 
-declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
+declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
 
-declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
+declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
 
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -89,8 +89,8 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
-  store i32 %result, i32 addrspace(1)* %out
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false), !noalias !0
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
@@ -98,7 +98,7 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad
 !1 = distinct !{!1, !2}
 !2 = distinct !{!2}
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -168,13 +168,13 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out,
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -220,11 +220,11 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 42 :: v_dual_mov_b32 v0, s0
 ; GFX11-NEXT:    ds_inc_u32 v0, v1
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -270,12 +270,12 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt
 ; GFX11-NEXT:    v_dual_mov_b32 v0, 42 :: v_dual_mov_b32 v1, s0
 ; GFX11-NEXT:    ds_inc_u32 v1, v0 offset:16
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -336,12 +336,12 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -406,13 +406,13 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -459,11 +459,11 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) n
 ; GFX11-NEXT:    global_atomic_inc_u32 v1, v0, s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -514,12 +514,12 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)*
 ; GFX11-NEXT:    global_atomic_inc_u32 v1, v0, s[0:1] offset:16
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -595,15 +595,15 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
-  %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id
-  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32 addrspace(1)* %out.gep
+  %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
+  %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id
+  %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -661,15 +661,15 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id
-  %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id
+  %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
 @lds0 = internal addrspace(3) global [512 x i32] undef, align 4
 
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ; CI-LABEL: atomic_inc_shl_base_lds_0_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -745,14 +745,14 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out,
 ; GFX11-NEXT:    s_endpgm
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0
-  %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false)
-  store i32 %idx.0, i32 addrspace(1)* %add_use
-  store i32 %val0, i32 addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
+  %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false)
+  store i32 %idx.0, ptr addrspace(1) %add_use
+  store i32 %val0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_inc_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -827,12 +827,12 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ad
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: lds_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s2, s[4:5], 0x2
@@ -907,13 +907,13 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out,
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -964,11 +964,11 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    ds_inc_u64 v2, v[0:1]
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind {
+define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr) nounwind {
 ; CI-LABEL: lds_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
@@ -1019,12 +1019,12 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s0
 ; GFX11-NEXT:    ds_inc_u64 v2, v[0:1] offset:32
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1090,12 +1090,12 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1165,13 +1165,13 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o
 ; GFX11-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out
+  %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1223,11 +1223,11 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) n
 ; GFX11-NEXT:    global_atomic_inc_u64 v2, v[0:1], s[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %ptr) nounwind {
 ; CI-LABEL: global_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1283,12 +1283,12 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)*
 ; GFX11-NEXT:    global_atomic_inc_u64 v2, v[0:1], s[0:1] offset:32
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1369,15 +1369,15 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
-  %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id
-  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64 addrspace(1)* %out.gep
+  %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
+  %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id
+  %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr addrspace(1) %out.gep
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 {
 ; CI-LABEL: global_atomic_inc_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1440,13 +1440,13 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspa
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id
-  %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id
+  %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #0 {
 ; GCN-LABEL: flat_atomic_inc_ret_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1473,12 +1473,12 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 {
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_ret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1553,13 +1553,13 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr)
 ; GFX11-NEXT:    flat_store_b32 v[0:1], v2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out
+  %gep = getelementptr i32, ptr %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) nounwind {
 ; GCN-LABEL: flat_atomic_inc_noret_i32:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1579,11 +1579,11 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind {
 ; GFX11-NEXT:    flat_atomic_inc_u32 v[0:1], v2
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i32_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1639,12 +1639,12 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind
 ; GFX11-NEXT:    flat_atomic_inc_u32 v[0:1], v2 offset:16
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32* %ptr, i32 4
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i32, ptr %ptr, i32 4
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_ret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1746,15 +1746,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
-  %out.gep = getelementptr i32, i32* %out, i32 %id
-  %gep = getelementptr i32, i32* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
-  store i32 %result, i32* %out.gep
+  %gep.tid = getelementptr i32, ptr %ptr, i32 %id
+  %out.gep = getelementptr i32, ptr %out, i32 %id
+  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
+  store i32 %result, ptr %out.gep
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_noret_i32_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -1827,15 +1827,15 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i32, i32* %ptr, i32 %id
-  %gep = getelementptr i32, i32* %gep.tid, i32 5
-  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i32, ptr %ptr, i32 %id
+  %gep = getelementptr i32, ptr %gep.tid, i32 5
+  %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false)
   ret void
 }
 
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
+define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 {
 ; CI-LABEL: atomic_inc_shl_base_lds_0_i64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1916,14 +1916,14 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out,
 ; GFX11-NEXT:    s_endpgm
   %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
   %idx.0 = add nsw i32 %tid.x, 2
-  %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0
-  %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false)
-  store i32 %idx.0, i32 addrspace(1)* %add_use
-  store i64 %val0, i64 addrspace(1)* %out
+  %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
+  %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false)
+  store i32 %idx.0, ptr addrspace(1) %add_use
+  store i64 %val0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #0 {
 ; GCN-LABEL: flat_atomic_inc_ret_i64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -1952,12 +1952,12 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 {
 ; GFX11-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_ret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2037,13 +2037,13 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr)
 ; GFX11-NEXT:    flat_store_b64 v[2:3], v[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out
+  %gep = getelementptr i64, ptr %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) nounwind {
 ; GCN-LABEL: flat_atomic_inc_noret_i64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2065,11 +2065,11 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind {
 ; GFX11-NEXT:    flat_atomic_inc_u64 v[2:3], v[0:1]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false)
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind {
 ; CI-LABEL: flat_atomic_inc_noret_i64_offset:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2130,12 +2130,12 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind
 ; GFX11-NEXT:    flat_atomic_inc_u64 v[2:3], v[0:1] offset:32
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i64, i64* %ptr, i32 4
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep = getelementptr i64, ptr %ptr, i32 4
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_ret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
@@ -2243,15 +2243,15 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
-  %out.gep = getelementptr i64, i64* %out, i32 %id
-  %gep = getelementptr i64, i64* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
-  store i64 %result, i64* %out.gep
+  %gep.tid = getelementptr i64, ptr %ptr, i32 %id
+  %out.gep = getelementptr i64, ptr %out, i32 %id
+  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
+  store i64 %result, ptr %out.gep
   ret void
 }
 
-define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 {
+define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0 {
 ; CI-LABEL: flat_atomic_inc_noret_i64_offset_addr64:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -2329,13 +2329,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.tid = getelementptr i64, i64* %ptr, i32 %id
-  %gep = getelementptr i64, i64* %gep.tid, i32 5
-  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false)
+  %gep.tid = getelementptr i64, ptr %ptr, i32 %id
+  %gep = getelementptr i64, ptr %gep.tid, i32 5
+  %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 {
+define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) #0 {
 ; CI-LABEL: nocse_lds_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dword s6, s[4:5], 0x4
@@ -2422,11 +2422,11 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0,
 ; GFX11-NEXT:    global_store_b32 v1, v0, s[2:3]
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
-  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
+  %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false)
 
-  store i32 %result0, i32 addrspace(1)* %out0
-  store i32 %result1, i32 addrspace(1)* %out1
+  store i32 %result0, ptr addrspace(1) %out0
+  store i32 %result1, ptr addrspace(1) %out1
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll
index 0121d27138a72..278e9b03e84fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.id.ll
@@ -9,9 +9,9 @@ declare i64 @llvm.amdgcn.dispatch.id() #1
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]]
-define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 {
   %tmp0 = call i64 @llvm.amdgcn.dispatch.id()
-  store i64 %tmp0, i64 addrspace(1)* %out
+  store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
index 38505220392b0..b8cfe23a78802 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
@@ -5,14 +5,13 @@
 ; GCN-LABEL: {{^}}test:
 ; GCN: enable_sgpr_dispatch_ptr = 1
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
-  %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
-  %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
-  %value = load i32, i32 addrspace(4)* %header_ptr
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
+  %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
+  %value = load i32, ptr addrspace(4) %dispatch_ptr
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
+declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
 
 attributes #0 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 99ef58147896c..d8705bffe7e90 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -295,7 +295,7 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double
   ret double %result
 }
 
-define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
+define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
@@ -409,11 +409,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
+define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
@@ -516,11 +516,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
+define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x2
@@ -623,11 +623,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
+define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
@@ -730,11 +730,11 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) {
+define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f64:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
@@ -843,11 +843,11 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d)
-  store double %result, double addrspace(1)* %out, align 8
+  store double %result, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) {
+define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) {
 ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2
@@ -956,11 +956,11 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou
 ; GFX11_W64-NEXT:    s_endpgm
   %cmp = icmp eq i32 %i, 0
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
+define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
 ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
@@ -1060,11 +1060,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
+define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
 ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
@@ -1164,11 +1164,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true)
-  store float %result, float addrspace(1)* %out, align 4
+  store float %result, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], i32 %d) {
+define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
@@ -1326,25 +1326,25 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 ; GFX11_W64-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11_W64-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
-  %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
+  %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
+  %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
 
-  %a = load volatile float, float addrspace(1)* %gep.a
-  %b = load volatile float, float addrspace(1)* %gep.b
-  %c = load volatile float, float addrspace(1)* %gep.c
+  %a = load volatile float, ptr addrspace(1) %gep.a
+  %b = load volatile float, ptr addrspace(1) %gep.b
+  %c = load volatile float, ptr addrspace(1) %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   %cmp1 = icmp ne i32 %d, 0
   %and = and i1 %cmp0, %cmp1
 
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and)
-  store float %result, float addrspace(1)* %gep.out, align 4
+  store float %result, ptr addrspace(1) %gep.out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, [8 x i32], float addrspace(1)* %in, [8 x i32], i32 addrspace(1)* %dummy) {
+define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) {
 ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xa
@@ -1536,27 +1536,27 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ; GFX11_W64-NEXT:    s_endpgm
 entry:
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1
-  %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2
+  %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1
+  %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2
 
-  %a = load float, float addrspace(1)* %gep.a
-  %b = load float, float addrspace(1)* %gep.b
-  %c = load float, float addrspace(1)* %gep.c
+  %a = load float, ptr addrspace(1) %gep.a
+  %b = load float, ptr addrspace(1) %gep.b
+  %c = load float, ptr addrspace(1) %gep.c
 
   %cmp0 = icmp eq i32 %tid, 0
   br i1 %cmp0, label %bb, label %exit
 
 bb:
-  %val = load i32, i32 addrspace(1)* %dummy
+  %val = load i32, ptr addrspace(1) %dummy
   %cmp1 = icmp ne i32 %val, 0
   br label %exit
 
 exit:
   %cond = phi i1 [false, %entry], [%cmp1, %bb]
-  %gep.out = getelementptr float, float addrspace(1)* %out, i32 2
+  %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2
   %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond)
-  store float %result, float addrspace(1)* %gep.out, align 4
+  store float %result, ptr addrspace(1) %gep.out, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index d75b0a8f10081..bc186982968d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefix=GFX11 %s
 
 
-define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -75,19 +75,19 @@ define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -157,19 +157,19 @@ define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f64_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -245,19 +245,19 @@ define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, doubl
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile double, double addrspace(1)* %gep.0, align 8
-  %b = load volatile double, double addrspace(1)* %gep.1, align 8
+  %a = load volatile double, ptr addrspace(1) %gep.0, align 8
+  %b = load volatile double, ptr addrspace(1) %gep.1, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f64_2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f64_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
@@ -333,19 +333,19 @@ define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, doubl
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile double, double addrspace(1)* %gep.0, align 8
-  %b = load volatile double, double addrspace(1)* %gep.1, align 8
+  %a = load volatile double, ptr addrspace(1) %gep.0, align 8
+  %b = load volatile double, ptr addrspace(1) %gep.1, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], float %a) {
+define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], float %a) {
 ; GFX7-LABEL: test_div_scale_f32_scalar_num_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -408,17 +408,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
+  %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
 
-  %b = load float, float addrspace(1)* %gep, align 4
+  %b = load float, ptr addrspace(1) %gep, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) {
+define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) {
 ; GFX7-LABEL: test_div_scale_f32_scalar_num_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -481,17 +481,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
+  %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
 
-  %b = load float, float addrspace(1)* %gep, align 4
+  %b = load float, ptr addrspace(1) %gep, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) {
+define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) {
 ; GFX7-LABEL: test_div_scale_f32_scalar_den_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -554,17 +554,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
+  %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
 
-  %a = load float, float addrspace(1)* %gep, align 4
+  %a = load float, ptr addrspace(1) %gep, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) {
+define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) {
 ; GFX7-LABEL: test_div_scale_f32_scalar_den_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -627,17 +627,17 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr float, float addrspace(1)* %in, i32 %tid
+  %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid
 
-  %a = load float, float addrspace(1)* %gep, align 4
+  %a = load float, ptr addrspace(1) %gep, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %a) {
+define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %a) {
 ; GFX7-LABEL: test_div_scale_f64_scalar_num_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -701,17 +701,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
 
-  %b = load double, double addrspace(1)* %gep, align 8
+  %b = load double, ptr addrspace(1) %gep, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32],  double %a) {
+define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32],  double %a) {
 ; GFX7-LABEL: test_div_scale_f64_scalar_num_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -775,17 +775,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
 
-  %b = load double, double addrspace(1)* %gep, align 8
+  %b = load double, ptr addrspace(1) %gep, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %b) {
+define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %b) {
 ; GFX7-LABEL: test_div_scale_f64_scalar_den_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -849,17 +849,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
 
-  %a = load double, double addrspace(1)* %gep, align 8
+  %a = load double, ptr addrspace(1) %gep, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, [8 x i32], double %b) {
+define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], double %b) {
 ; GFX7-LABEL: test_div_scale_f64_scalar_den_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
@@ -923,17 +923,17 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr double, double addrspace(1)* %in, i32 %tid
+  %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid
 
-  %a = load double, double addrspace(1)* %gep, align 8
+  %a = load double, ptr addrspace(1) %gep, align 8
 
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) {
+define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) {
 ; GFX7-LABEL: test_div_scale_f32_all_scalar_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
@@ -986,11 +986,11 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %
 ; GFX11-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) {
+define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) {
 ; GFX7-LABEL: test_div_scale_f32_all_scalar_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
@@ -1043,11 +1043,11 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %
 ; GFX11-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) {
+define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) {
 ; GFX7-LABEL: test_div_scale_f64_all_scalar_1:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x1d
@@ -1102,11 +1102,11 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)*
 ; GFX11-NEXT:    s_endpgm
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) {
+define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) {
 ; GFX7-LABEL: test_div_scale_f64_all_scalar_2:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x1d
@@ -1161,11 +1161,11 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)*
 ; GFX11-NEXT:    s_endpgm
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_inline_imm_num:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1224,16 +1224,16 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float, float addrspace(1)* %gep.0, align 4
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %a = load float, ptr addrspace(1) %gep.0, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_inline_imm_den:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1292,16 +1292,16 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)*
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %a = load float, float addrspace(1)* %gep.0, align 4
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %a = load float, ptr addrspace(1) %gep.0, align 4
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_fabs_num(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_fabs_num:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1376,21 +1376,21 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %a.fabs = call float @llvm.fabs.f32(float %a)
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) {
+define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; GFX7-LABEL: test_div_scale_f32_fabs_den:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
@@ -1465,21 +1465,21 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid
-  %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1
+  %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
+  %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1
 
-  %a = load volatile float, float addrspace(1)* %gep.0, align 4
-  %b = load volatile float, float addrspace(1)* %gep.1, align 4
+  %a = load volatile float, ptr addrspace(1) %gep.0, align 4
+  %b = load volatile float, ptr addrspace(1) %gep.1, align 4
 
   %b.fabs = call float @llvm.fabs.f32(float %b)
 
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 {
 ; GFX7-LABEL: test_div_scale_f32_val_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1522,11 +1522,11 @@ define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)*
 ; GFX11-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 {
 ; GFX7-LABEL: test_div_scale_f32_undef_val_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1569,11 +1569,11 @@ define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)*
 ; GFX11-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 {
 ; GFX7-LABEL: test_div_scale_f32_undef_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1614,11 +1614,11 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)
 ; GFX11-NEXT:    s_endpgm
   %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false)
   %result0 = extractvalue { float, i1 } %result, 0
-  store float %result0, float addrspace(1)* %out, align 4
+  store float %result0, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 {
+define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 {
 ; GFX7-LABEL: test_div_scale_f64_val_undef_val:
 ; GFX7:       ; %bb.0:
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
@@ -1667,7 +1667,7 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)*
 ; GFX11-NEXT:    s_endpgm
   %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false)
   %result0 = extractvalue { double, i1 } %result, 0
-  store double %result0, double addrspace(1)* %out, align 8
+  store double %result0, ptr addrspace(1) %out, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
index 75de30e5ddf54..c614f1b6b7aa3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s
 
-define amdgpu_ps float @ds_fadd_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fadd_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fadd_f32_ss:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -36,11 +36,11 @@ define amdgpu_ps float @ds_fadd_f32_ss(float addrspace(3)* inreg %ptr, float inr
 ; GFX11-NEXT:    ds_add_rtn_f32 v0, v0, v1
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps float @ds_fadd_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fadd_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fadd_f32_ss_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -72,12 +72,12 @@ define amdgpu_ps float @ds_fadd_f32_ss_offset(float addrspace(3)* inreg %ptr, fl
 ; GFX11-NEXT:    ds_add_rtn_f32 v0, v1, v0 offset:512
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps void @ds_fadd_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fadd_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fadd_f32_ss_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -105,11 +105,11 @@ define amdgpu_ps void @ds_fadd_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX11-NEXT:    ds_add_f32 v0, v1
 ; GFX11-NEXT:    s_endpgm
-  %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fadd_f32_ss_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -137,12 +137,12 @@ define amdgpu_ps void @ds_fadd_f32_ss_offset_nortn(float addrspace(3)* inreg %pt
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
 ; GFX11-NEXT:    ds_add_f32 v1, v0 offset:512
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %unused = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %unused = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fadd_f32_vv(float addrspace(3)* %ptr, float %val) {
+define float @ds_fadd_f32_vv(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fadd_f32_vv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -165,11 +165,11 @@ define float @ds_fadd_f32_vv(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_add_rtn_f32 v0, v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define float @ds_fadd_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
+define float @ds_fadd_f32_vv_offset(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fadd_f32_vv_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -192,12 +192,12 @@ define float @ds_fadd_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_add_rtn_f32 v0, v0, v1 offset:512
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define void @ds_fadd_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fadd_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fadd_f32_vv_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -220,11 +220,11 @@ define void @ds_fadd_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_add_f32 v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define void @ds_fadd_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fadd_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fadd_f32_vv_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -247,12 +247,12 @@ define void @ds_fadd_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_add_f32 v0, v1 offset:512
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fadd_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
+define float @ds_fadd_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fadd_f32_vv_volatile:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -275,10 +275,10 @@ define float @ds_fadd_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_add_rtn_f32 v0, v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fadd(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
+  %ret = call float @llvm.amdgcn.ds.fadd(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fadd(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
+declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
 
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
index 8a11ff509d2a2..e4c4f42b137ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
@@ -8,7 +8,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s
 
 
-define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fmax_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmax_f32_ss:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -48,11 +48,11 @@ define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inr
   ; GFX9-MIR-NEXT:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
   ; GFX9-MIR-NEXT:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
   ; GFX9-MIR-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fmax_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmax_f32_ss_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -92,12 +92,12 @@ define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, fl
   ; GFX9-MIR-NEXT:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
   ; GFX9-MIR-NEXT:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
   ; GFX9-MIR-NEXT:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fmax_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmax_f32_ss_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -133,11 +133,11 @@ define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa
   ; GFX9-MIR-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
   ; GFX9-MIR-NEXT:   DS_MAX_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
   ; GFX9-MIR-NEXT:   S_ENDPGM 0
-  %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -173,12 +173,12 @@ define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %pt
   ; GFX9-MIR-NEXT:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
   ; GFX9-MIR-NEXT:   DS_MAX_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
   ; GFX9-MIR-NEXT:   S_ENDPGM 0
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %unused = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmax_f32_vv(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmax_f32_vv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -212,11 +212,11 @@ define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
   ; GFX9-MIR-NEXT:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
   ; GFX9-MIR-NEXT:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
   ; GFX9-MIR-NEXT:   SI_RETURN implicit $vgpr0
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmax_f32_vv_offset(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmax_f32_vv_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -250,12 +250,12 @@ define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
   ; GFX9-MIR-NEXT:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
   ; GFX9-MIR-NEXT:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
   ; GFX9-MIR-NEXT:   SI_RETURN implicit $vgpr0
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fmax_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmax_f32_vv_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -287,11 +287,11 @@ define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
   ; GFX9-MIR-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; GFX9-MIR-NEXT:   DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
   ; GFX9-MIR-NEXT:   SI_RETURN
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fmax_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -323,12 +323,12 @@ define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
   ; GFX9-MIR-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
   ; GFX9-MIR-NEXT:   DS_MAX_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
   ; GFX9-MIR-NEXT:   SI_RETURN
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmax_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmax_f32_vv_volatile:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -362,10 +362,10 @@ define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
   ; GFX9-MIR-NEXT:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
   ; GFX9-MIR-NEXT:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
   ; GFX9-MIR-NEXT:   SI_RETURN implicit $vgpr0
-  %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
+  %ret = call float @llvm.amdgcn.ds.fmax(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
+declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
 
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
index 29e2e6e4ac96c..b824511c3f5c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
@@ -4,7 +4,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s
 
-define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fmin_f32_ss(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmin_f32_ss:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -36,11 +36,11 @@ define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inr
 ; GFX11-NEXT:    ds_min_rtn_f32 v0, v0, v1
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps float @ds_fmin_f32_ss_offset(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmin_f32_ss_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -72,12 +72,12 @@ define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, fl
 ; GFX11-NEXT:    ds_min_rtn_f32 v0, v1, v0 offset:512
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fmin_f32_ss_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmin_f32_ss_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
@@ -105,11 +105,11 @@ define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, floa
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
 ; GFX11-NEXT:    ds_min_f32 v0, v1
 ; GFX11-NEXT:    s_endpgm
-  %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %unused = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
+define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(ptr addrspace(3) inreg %ptr, float inreg %val) {
 ; GFX8-LABEL: ds_fmin_f32_ss_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
@@ -137,12 +137,12 @@ define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %pt
 ; GFX11-NEXT:    v_dual_mov_b32 v0, s3 :: v_dual_mov_b32 v1, s2
 ; GFX11-NEXT:    ds_min_f32 v1, v0 offset:512
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %unused = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmin_f32_vv(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmin_f32_vv:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -165,11 +165,11 @@ define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_min_rtn_f32 v0, v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmin_f32_vv_offset(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmin_f32_vv_offset:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -192,12 +192,12 @@ define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_min_rtn_f32 v0, v0, v1 offset:512
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret float %ret
 }
 
-define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fmin_f32_vv_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmin_f32_vv_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -220,11 +220,11 @@ define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_min_f32 v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
+define void @ds_fmin_f32_vv_offset_nortn(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmin_f32_vv_offset_nortn:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -247,12 +247,12 @@ define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_min_f32 v0, v1 offset:512
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
+  %gep = getelementptr float, ptr addrspace(3) %ptr, i32 128
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %gep, float %val, i32 0, i32 0, i1 false)
   ret void
 }
 
-define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
+define float @ds_fmin_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
 ; GFX8-LABEL: ds_fmin_f32_vv_volatile:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -275,10 +275,10 @@ define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
 ; GFX10PLUS-NEXT:    ds_min_rtn_f32 v0, v0, v1
 ; GFX10PLUS-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10PLUS-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
+  %ret = call float @llvm.amdgcn.ds.fmin(ptr addrspace(3) %ptr, float %val, i32 0, i32 0, i1 true)
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
+declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
 
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index fd2251bff0b1c..2f558181bf613 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -47,12 +47,12 @@ entry:
   br i1 %cond, label %mid, label %bb
 
 mid:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   br label %bb
 
 bb:
   call void @llvm.amdgcn.end.cf.i32(i32 %saved)
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
index 1e39c6395ca7d..81d8472ebd46e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
@@ -25,12 +25,12 @@ entry:
   br i1 %cond, label %mid, label %bb
 
 mid:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   br label %bb
 
 bb:
   call void @llvm.amdgcn.end.cf.i64(i64 %saved)
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
index 419bdb2076cca..f4a3234c73ee6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX10
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX11
 
-define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) {
+define i32 @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -19,11 +19,11 @@ define i32 @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) {
 ; GFX11-NEXT:    global_atomic_csub_u32 v0, v[0:1], v2, off glc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %data)
   ret i32 %ret
 }
 
-define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) {
+define i32 @global_atomic_csub_offset(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub_offset:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,12 +50,12 @@ define i32 @global_atomic_csub_offset(i32 addrspace(1)* %ptr, i32 %data) {
 ; GFX11-NEXT:    global_atomic_csub_u32 v0, v[0:1], v2, off glc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data)
   ret i32 %ret
 }
 
-define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+define void @global_atomic_csub_nortn(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub_nortn:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -71,11 +71,11 @@ define void @global_atomic_csub_nortn(i32 addrspace(1)* %ptr, i32 %data) {
 ; GFX11-NEXT:    global_atomic_csub_u32 v0, v[0:1], v2, off glc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %ptr, i32 %data)
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %data)
   ret void
 }
 
-define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+define void @global_atomic_csub_offset_nortn(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub_offset_nortn:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -102,12 +102,12 @@ define void @global_atomic_csub_offset_nortn(i32 addrspace(1)* %ptr, i32 %data)
 ; GFX11-NEXT:    global_atomic_csub_u32 v0, v[0:1], v2, off glc
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)* %ptr, i32 %data) {
+define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub_sgpr_base_offset:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
@@ -133,13 +133,13 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset(i32 addrspace(1)*
 ; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
-  store i32 %ret, i32 addrspace(1)* undef
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data)
+  store i32 %ret, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspace(1)* %ptr, i32 %data) {
+define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspace(1) %ptr, i32 %data) {
 ; GFX10-LABEL: global_atomic_csub_sgpr_base_offset_nortn:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
@@ -161,12 +161,12 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(i32 addrspa
 ; GFX11-NEXT:    global_atomic_csub_u32 v0, v1, v0, s[0:1] glc
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 1024
-  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* %gep, i32 %data)
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 1024
+  %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %gep, i32 %data)
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1i32(i32 addrspace(1)* nocapture, i32) #1
+declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1
 
 attributes #0 = { nounwind willreturn }
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
index 5681953cb3a62..ba9d5378c2767 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
@@ -1,21 +1,21 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
 ; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float)
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>)
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>)
 
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.global.atomic.fadd)
 
 ; GFX90A-LABEL: {{^}}global_atomic_fadd_f32_rtn:
 ; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc
-define float @global_atomic_fadd_f32_rtn(float addrspace(1)* %ptr, float %data) {
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+define float @global_atomic_fadd_f32_rtn(ptr addrspace(1) %ptr, float %data) {
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret float %ret
 }
 
 ; GFX90A-LABEL: {{^}}global_atomic_fadd_v2f16_rtn:
 ; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc
-define <2 x half> @global_atomic_fadd_v2f16_rtn(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+define <2 x half> @global_atomic_fadd_v2f16_rtn(ptr addrspace(1) %ptr, <2 x half> %data) {
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
index 4af59846cbb51..752ddbb896c6b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
@@ -2,7 +2,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
 
-define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) {
+define void @global_atomic_fadd_f32(ptr addrspace(1) %ptr, float %data) {
 ; GFX908-LABEL: global_atomic_fadd_f32:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -16,11 +16,11 @@ define void @global_atomic_fadd_f32(float addrspace(1)* %ptr, float %data) {
 ; GFX90A-NEXT:    global_atomic_add_f32 v[0:1], v2, off
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data)
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
   ret void
 }
 
-define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %data) {
+define void @global_atomic_fadd_f32_off_2048(ptr addrspace(1) %ptr, float %data) {
 ; GFX908-LABEL: global_atomic_fadd_f32_off_2048:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -34,12 +34,12 @@ define void @global_atomic_fadd_f32_off_2048(float addrspace(1)* %ptr, float %da
 ; GFX90A-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:2048
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 512
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 512
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data)
   ret void
 }
 
-define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float %data) {
+define void @global_atomic_fadd_f32_off_neg2047(ptr addrspace(1) %ptr, float %data) {
 ; GFX908-LABEL: global_atomic_fadd_f32_off_neg2047:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -53,12 +53,12 @@ define void @global_atomic_fadd_f32_off_neg2047(float addrspace(1)* %ptr, float
 ; GFX90A-NEXT:    global_atomic_add_f32 v[0:1], v2, off offset:-2044
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 -511
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 -511
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data)
   ret void
 }
 
-define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %ptr, float %data) {
+define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(ptr addrspace(1) %ptr, float %data) {
 ; GFX908-LABEL: global_atomic_fadd_f32_off_ss:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_load_dword s2, s[4:5], 0x8
@@ -78,12 +78,12 @@ define amdgpu_kernel void @global_atomic_fadd_f32_off_ss(float addrspace(1)* %pt
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX90A-NEXT:    global_atomic_add_f32 v1, v0, s[0:1] offset:2048
 ; GFX90A-NEXT:    s_endpgm
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 512
-  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %gep, float %data)
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 512
+  %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %gep, float %data)
   ret void
 }
 
-define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define void @global_atomic_fadd_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) {
 ; GFX908-LABEL: global_atomic_fadd_v2f16:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -97,11 +97,11 @@ define void @global_atomic_fadd_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half>
 ; GFX90A-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data)
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
 
-define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr, <2 x half> %data) {
+define void @global_atomic_fadd_v2f16_off_neg2047(ptr addrspace(1) %ptr, <2 x half> %data) {
 ; GFX908-LABEL: global_atomic_fadd_v2f16_off_neg2047:
 ; GFX908:       ; %bb.0:
 ; GFX908-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -115,12 +115,12 @@ define void @global_atomic_fadd_v2f16_off_neg2047(<2 x half> addrspace(1)* %ptr,
 ; GFX90A-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:-2044
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    s_setpc_b64 s[30:31]
-  %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -511
-  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %gep, <2 x half> %data)
+  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -511
+  %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %gep, <2 x half> %data)
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* nocapture, float) #0
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* nocapture, <2 x half>) #0
+declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
+declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) #0
 
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
index cc1cf733efa6e..a1cfcc6c3f4fc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
@@ -38,7 +38,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 entry:
   %cond = icmp eq i32 %arg0, 0
   %break = call i32 @llvm.amdgcn.if.break.i32(i1 %cond, i32 %saved)
-  store volatile i32 %break, i32 addrspace(1)* undef
+  store volatile i32 %break, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
index 7d2a57643bc3f..9718cef5c6db0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test_wave64(i32 %arg0, [8 x i32], i64 %saved) {
 entry:
   %cond = icmp eq i32 %arg0, 0
   %break = call i64 @llvm.amdgcn.if.break.i64(i1 %cond, i64 %saved)
-  store volatile i64 %break, i64 addrspace(1)* undef
+  store volatile i64 %break, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
index 9b77132581fd6..8e02052d7a905 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
@@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s,
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
+define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -119,11 +119,11 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
-define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
+define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
 ; GFX6-LABEL: load_2d_v4f32_xyzw_tfe_lwe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -207,7 +207,7 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
index 13677ae7b5e68..cc248964e9914 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
@@ -40,7 +40,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc,
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
+define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_mov_b32 s0, s2
@@ -128,11 +128,11 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %r
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
-define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
+define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) {
 ; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_mov_b32 s0, s2
@@ -220,7 +220,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
index 2e3ecd03b9f66..6a7e847528898 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
@@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc,
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -124,11 +124,11 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %r
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
-define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
+define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
 ; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -217,7 +217,7 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
index d6413e956b9da..f831b118285a3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
@@ -38,7 +38,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s,
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) {
+define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) {
 ; GFX9-LABEL: load_3d_v4f32_xyzw_tfe:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_mov_b32 s0, s2
@@ -123,11 +123,11 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
-define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i16 %s, i16 %t, i16 %r) {
+define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %r) {
 ; GFX9-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_mov_b32 s0, s2
@@ -212,7 +212,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %r, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
index ab0b3bb83a36f..f22877e3c62e2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
@@ -35,7 +35,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s,
   ret <4 x float> %v
 }
 
-define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
+define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
 ; GFX6-LABEL: load_3d_v4f32_xyzw_tfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -121,11 +121,11 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, i32
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
-define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
+define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
 ; GFX6-LABEL: load_3d_v4f32_xyzw_tfe_lwe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v5, v0
@@ -211,7 +211,7 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
   %v.vec = extractvalue { <4 x float>, i32 } %v, 0
   %v.err = extractvalue { <4 x float>, i32 } %v, 1
-  store i32 %v.err, i32 addrspace(1)* %out, align 4
+  store i32 %v.err, ptr addrspace(1) %out, align 4
   ret <4 x float> %v.vec
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll
index 2028b8abe9606..7d693d8a1bb29 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.buffer.ptr.ll
@@ -5,13 +5,12 @@
 ; GCN-LABEL: {{^}}test_ps:
 ; GCN: s_load_dword s{{[0-9]+}}, s[0:1], 0x0
 define amdgpu_ps i32 @test_ps() #1 {
-  %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr()
-  %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)*
-  %value = load volatile i32, i32 addrspace(4)* %buffer_ptr
+  %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr()
+  %value = load volatile i32, ptr addrspace(4) %implicit_buffer_ptr
   ret i32 %value
 }
 
-declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
 
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index 56d818ed4e0f7..329e6fbae47ad 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -73,7 +73,7 @@ main_body:
   ret void
 }
 
-define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg %m0) #0 {
+define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 {
 ; GCN-LABEL: v_interp_f32_many_vm:
 ; GCN:       ; %bb.0: ; %main_body
 ; GCN-NEXT:    global_load_b64 v[0:1], v[0:1], off offset:4
@@ -99,10 +99,10 @@ define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg
 ; GCN-NEXT:    exp mrt0 v6, v7, v8, v0 done
 ; GCN-NEXT:    s_endpgm
 main_body:
-  %i.ptr = getelementptr float, float addrspace(1)* %ptr, i32 1
-  %i = load float, float addrspace(1)* %i.ptr, align 4
-  %j.ptr = getelementptr float, float addrspace(1)* %ptr, i32 2
-  %j = load float, float addrspace(1)* %j.ptr, align 4
+  %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1
+  %i = load float, ptr addrspace(1) %i.ptr, align 4
+  %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2
+  %j = load float, ptr addrspace(1) %j.ptr, align 4
   %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0)
   %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0)
   %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 576d71879a3d9..463b59a4fd9f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -605,7 +605,7 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
   ret <4 x float> %r
 }
 
-define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
+define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) {
 ; GFX1030-LABEL: image_bvh_intersect_ray_nsa_reassign:
 ; GFX1030:       ; %bb.0:
 ; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
@@ -702,10 +702,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
-  %node_ptr = load i32, i32* %gep_node_ptr, align 4
-  %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
-  %ray_extent = load float, float* %gep_ray, align 4
+  %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid
+  %node_ptr = load i32, ptr %gep_node_ptr, align 4
+  %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid
+  %ray_extent = load float, ptr %gep_ray, align 4
   %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
   %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
   %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
@@ -716,11 +716,11 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr
   %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1
   %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2
   %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
-  store <4 x i32> %v, <4 x i32>* undef
+  store <4 x i32> %v, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
+define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) {
 ; GFX1030-LABEL: image_bvh_intersect_ray_a16_nsa_reassign:
 ; GFX1030:       ; %bb.0:
 ; GFX1030-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
@@ -842,10 +842,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
-  %node_ptr = load i32, i32* %gep_node_ptr, align 4
-  %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
-  %ray_extent = load float, float* %gep_ray, align 4
+  %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid
+  %node_ptr = load i32, ptr %gep_node_ptr, align 4
+  %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid
+  %ray_extent = load float, ptr %gep_ray, align 4
   %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
   %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
   %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
@@ -856,11 +856,11 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node
   %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1
   %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2
   %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
-  store <4 x i32> %v, <4 x i32>* undef
+  store <4 x i32> %v, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
+define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) {
 ; GFX1030-LABEL: image_bvh64_intersect_ray_nsa_reassign:
 ; GFX1030:       ; %bb.0:
 ; GFX1030-NEXT:    s_clause 0x1
@@ -956,8 +956,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray,
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
-  %ray_extent = load float, float* %gep_ray, align 4
+  %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid
+  %ray_extent = load float, ptr %gep_ray, align 4
   %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
   %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
   %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
@@ -968,11 +968,11 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray,
   %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1
   %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2
   %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr)
-  store <4 x i32> %v, <4 x i32>* undef
+  store <4 x i32> %v, ptr undef
   ret void
 }
 
-define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
+define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) {
 ; GFX1030-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign:
 ; GFX1030:       ; %bb.0:
 ; GFX1030-NEXT:    s_clause 0x1
@@ -1094,8 +1094,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %lid = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
-  %ray_extent = load float, float* %gep_ray, align 4
+  %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid
+  %ray_extent = load float, ptr %gep_ray, align 4
   %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0
   %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1
   %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2
@@ -1106,6 +1106,6 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_
   %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1
   %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2
   %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr)
-  store <4 x i32> %v, <4 x i32>* undef
+  store <4 x i32> %v, ptr undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 34b07a1d48e5d..b2e4d6787b74c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -6,7 +6,7 @@
 
 ; TODO: Merge with DAG test
 
-define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) {
+define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-LABEL: is_private_vgpr:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -70,15 +70,15 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id
-  %ptr = load volatile i8*, i8* addrspace(1)* %gep
-  %val = call i1 @llvm.amdgcn.is.private(i8* %ptr)
+  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
+  %ptr = load volatile ptr, ptr addrspace(1) %gep
+  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
   %ext = zext i1 %val to i32
-  store i32 %ext, i32 addrspace(1)* undef
+  store i32 %ext, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
+define amdgpu_kernel void @is_private_sgpr(ptr %ptr) {
 ; CI-LABEL: is_private_sgpr:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -140,11 +140,11 @@ define amdgpu_kernel void @is_private_sgpr(i8* %ptr) {
 ; GFX11-NEXT:  .LBB1_2: ; %bb1
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %val = call i1 @llvm.amdgcn.is.private(i8* %ptr)
+  %val = call i1 @llvm.amdgcn.is.private(ptr %ptr)
   br i1 %val, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   br label %bb1
 
 bb1:
@@ -152,6 +152,6 @@ bb1:
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(i8* nocapture) #0
+declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
 
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 8aee6d71e20ce..048fb5586ac0d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -6,7 +6,7 @@
 
 ; TODO: Merge with DAG test
 
-define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) {
+define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) {
 ; CI-LABEL: is_local_vgpr:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -70,15 +70,15 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id
-  %ptr = load volatile i8*, i8* addrspace(1)* %gep
-  %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
+  %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id
+  %ptr = load volatile ptr, ptr addrspace(1) %gep
+  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
   %ext = zext i1 %val to i32
-  store i32 %ext, i32 addrspace(1)* undef
+  store i32 %ext, ptr addrspace(1) undef
   ret void
 }
 
-define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
+define amdgpu_kernel void @is_local_sgpr(ptr %ptr) {
 ; CI-LABEL: is_local_sgpr:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -140,11 +140,11 @@ define amdgpu_kernel void @is_local_sgpr(i8* %ptr) {
 ; GFX11-NEXT:  .LBB1_2: ; %bb1
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr)
+  %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr)
   br i1 %val, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   br label %bb1
 
 bb1:
@@ -152,6 +152,6 @@ bb1:
 }
 
 declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #0
+declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
 
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
index ae04ab5ca9a71..3c049b4ee0ac9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -10,12 +10,11 @@
 ; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
 
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
-define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
-  %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-  %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
-  %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
-  %value = load i32, i32 addrspace(4)* %gep
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 {
+  %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+  %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
+  %value = load i32, ptr addrspace(4) %gep
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
@@ -27,12 +26,11 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
 ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
 
 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, 0x15
-define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
-  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
-  %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
-  %value = load i32, i32 addrspace(4)* %gep
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10
+  %value = load i32, ptr addrspace(4) %gep
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
@@ -47,11 +45,10 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
-define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
-  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
-  %val = load i32, i32 addrspace(4)* %arg.ptr
-  store i32 %val, i32 addrspace(1)* %out
+define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 {
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %val = load i32, ptr addrspace(4) %implicitarg.ptr
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -66,11 +63,10 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x
 ; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
 ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
 ; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
-define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 {
-  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
-  %val = load i32, i32 addrspace(4)* %arg.ptr
-  store i32 %val, i32 addrspace(1)* %out
+define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 {
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %val = load i32, ptr addrspace(4) %implicitarg.ptr
+  store i32 %val, ptr addrspace(1) %out
   ret void
 }
 
@@ -82,11 +78,10 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out
 ; HSA: s_mov_b64 [[OFFSET_NULL:s\[[0-9]+:[0-9]+\]]], 40{{$}}
 ; HSA: s_load_dword s{{[0-9]+}}, [[OFFSET_NULL]]
 define amdgpu_kernel void @test_no_kernargs() #1 {
-  %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-  %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
-  %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
-  %value = load i32, i32 addrspace(4)* %gep
-  store volatile i32 %value, i32 addrspace(1)* undef
+  %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+  %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10
+  %value = load i32, ptr addrspace(4) %gep
+  store volatile i32 %value, ptr addrspace(1) undef
   ret void
 }
 
@@ -95,10 +90,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 {
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; CO-V2: kernarg_segment_alignment = 4
 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
-  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
-  %val = load volatile i32, i32 addrspace(4)* %arg.ptr
-  store volatile i32 %val, i32 addrspace(1)* null
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
+  store volatile i32 %val, ptr addrspace(1) null
   ret void
 }
 
@@ -107,23 +101,22 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs()
 ; OS-MESA3D: kernarg_segment_byte_size = 16
 ; CO-V2: kernarg_segment_alignment = 4
 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
-  %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
-  %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
-  %val = load volatile i32, i32 addrspace(4)* %arg.ptr
-  store volatile i32 %val, i32 addrspace(1)* null
+  %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr
+  store volatile i32 %val, ptr addrspace(1) null
   ret void
 }
 
 ; ALL-LABEL: {{^}}func_kernarg_segment_ptr:
 ; ALL: v_mov_b32_e32 v0, 0{{$}}
 ; ALL: v_mov_b32_e32 v1, 0{{$}}
-define i8 addrspace(4)* @func_kernarg_segment_ptr() {
-  %ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
-  ret i8 addrspace(4)* %ptr
+define ptr addrspace(4) @func_kernarg_segment_ptr() {
+  %ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
+  ret ptr addrspace(4) %ptr
 }
 
-declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
-declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
+declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index c0b5f13139434..aecfbe7aa2260 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -10,7 +10,7 @@ declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x doubl
 declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32)
 declare i32 @llvm.amdgcn.workitem.id.x()
 
-define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_32x32x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[34:35], s[0:1], 0x24
@@ -70,15 +70,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1
 ; GCN-NEXT:    global_store_dwordx4 v0, a[28:31], s[34:35] offset:112
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
+  %in.1 = load <32 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
   %b = bitcast i64 2 to <4 x i16>
   %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3)
-  store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
+  store <32 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_16x16x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -116,15 +116,15 @@ define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1
 ; GCN-NEXT:    global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
+  %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
   %b = bitcast i64 2 to <4 x i16>
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3)
-  store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg
+  store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_4x4x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -146,15 +146,15 @@ define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)*
 ; GCN-NEXT:    global_store_dwordx4 v0, a[0:3], s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
+  %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
   %b = bitcast i64 2 to <4 x i16>
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3)
-  store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg
+  store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_32x32x8bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
@@ -193,15 +193,15 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1
 ; GCN-NEXT:    global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
+  %in.1 = load <16 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
   %b = bitcast i64 2 to <4 x i16>
   %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3)
-  store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg
+  store <16 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
+define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_16x16x16bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
@@ -224,15 +224,15 @@ define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1
 ; GCN-NEXT:    global_store_dwordx4 v0, a[0:3], s[4:5]
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
+  %in.1 = load <4 x float>, ptr addrspace(1) %arg
   %a = bitcast i64 1 to <4 x i16>
   %b = bitcast i64 2 to <4 x i16>
   %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3)
-  store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg
+  store <4 x float> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
 ; GCN-LABEL: test_mfma_f64_4x4x4f64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -251,11 +251,11 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, dou
 bb:
   %mai.1 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double 0.0, i32 0, i32 0, i32 0)
   %mai.2 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %mai.1, i32 1, i32 2, i32 3)
-  store double %mai.2, double addrspace(1)* %arg
+  store double %mai.2, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
@@ -283,13 +283,13 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %
 ; GCN-NEXT:    global_store_dwordx4 v0, a[4:7], s[8:9] offset:16
 ; GCN-NEXT:    s_endpgm
 bb:
-  %in.1 = load <4 x double>, <4 x double> addrspace(1)* %arg
+  %in.1 = load <4 x double>, ptr addrspace(1) %arg
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %in.1, i32 1, i32 2, i32 3)
-  store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg
+  store <4 x double> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_imm:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
@@ -310,11 +310,11 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrs
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 0.0>, i32 0, i32 0, i32 0)
   %mai.2 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %mai.1, i32 1, i32 2, i32 3)
-  store <4 x double> %mai.2, <4 x double> addrspace(1)* %arg
+  store <4 x double> %mai.2, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_imm:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
@@ -345,11 +345,11 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1
 ; GCN-NEXT:    s_endpgm
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 0.0, double 0.0, double 0.0, double 1.0>, i32 0, i32 0, i32 0)
-  store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg
+  store <4 x double> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
-define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
+define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 {
 ; GCN-LABEL: test_mfma_f64_16x16x4f64_splat_lit:
 ; GCN:       ; %bb.0: ; %bb
 ; GCN-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x24
@@ -381,7 +381,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrs
 ; GCN-NEXT:    s_endpgm
 bb:
   %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> <double 123.0, double 123.0, double 123.0, double 123.0>, i32 0, i32 0, i32 0)
-  store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg
+  store <4 x double> %mai.1, ptr addrspace(1) %arg
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
index 178b60463f1d5..820b5907569d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
@@ -5,7 +5,7 @@
 
 ; FIXME: Merge with DAG test
 
-define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) {
 ; GFX8-LABEL: dpp_test:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x2c
@@ -43,10 +43,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
   %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 true) #0
-  store i32 %tmp0, i32 addrspace(1)* %out
+  store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
-define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) {
+define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
 ; GFX8-LABEL: mov_dpp64_test:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -84,7 +84,7 @@ define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; encoding: [0x03,0x00,0xb6,0xbf]
 ; GFX11-NEXT:    s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf]
   %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 false) #0
-  store i64 %tmp0, i64 addrspace(1)* %out
+  store i64 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
index 4f51c551edf30..704e0a2a37e87 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
@@ -5,14 +5,13 @@
 ; GCN-LABEL: {{^}}test:
 ; GCN: enable_sgpr_queue_ptr = 1
 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
-  %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
-  %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
-  %value = load i32, i32 addrspace(4)* %header_ptr
-  store i32 %value, i32 addrspace(1)* %out
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
+  %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
+  %value = load i32, ptr addrspace(4) %queue_ptr
+  store i32 %value, ptr addrspace(1) %out
   ret void
 }
 
-declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
+declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
index 9120f53cb5565..4ac30eb5c28e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
@@ -11,7 +11,7 @@ declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i3
 define amdgpu_kernel void @buffer_atomic_add_f32_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 %soffset) {
 main_body:
   %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
-  store float %ret, float* undef
+  store float %ret, ptr undef
   ret void
 }
 
@@ -20,6 +20,6 @@ main_body:
 define amdgpu_kernel void @buffer_atomic_add_v2f16_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
 main_body:
   %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
-  store <2 x half> %ret, <2 x half>* undef
+  store <2 x half> %ret, ptr undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 7ad408c1f547e..bf09de7d62e25 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -5105,5 +5105,5 @@ declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
 declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
 declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
 
-declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg)
-declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg)
+declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
+declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 32c80c8e086e5..a2aa740410ab9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
   ret i64 %bfe_i32
 }
 
-define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -55,11 +55,11 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_i32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -73,11 +73,11 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_i32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -92,11 +92,11 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 {
+define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 {
 ; GFX6-LABEL: v_bfe_print_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -109,13 +109,13 @@ define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %src0, align 4
+  %load = load i32, ptr addrspace(1) %src0, align 4
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -128,11 +128,11 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -145,11 +145,11 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -163,14 +163,14 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -184,14 +184,14 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -205,14 +205,14 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -225,13 +225,13 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -244,13 +244,13 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -263,13 +263,13 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -282,13 +282,13 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -302,13 +302,13 @@ define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -322,13 +322,13 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -340,11 +340,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -356,11 +356,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -372,11 +372,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -388,11 +388,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -404,11 +404,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -421,11 +421,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -438,11 +438,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -455,11 +455,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -472,11 +472,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -489,11 +489,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -506,11 +506,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -523,11 +523,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -540,11 +540,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -557,11 +557,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -574,11 +574,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -591,11 +591,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -607,11 +607,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -624,11 +624,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -641,11 +641,11 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1)
-  store i32 %bfe_i32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_i32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_sext_in_reg_i24:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -659,15 +659,15 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24)
   %shl = shl i32 %bfe, 8
   %ashr = ashr i32 %shl, 8
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, 2.0
@@ -701,14 +701,14 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3
 ; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s2, v0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
-  %src = load i32, i32 addrspace(1)* %in, align 4
+  %src = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
   %div = sdiv i32 %bfe, 2
-  store i32 %div, i32 addrspace(1)* %out, align 4
+  store i32 %div, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: bfe_0_width:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -721,13 +721,13 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %ptr, align 4
+  %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: bfe_8_bfe_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -741,14 +741,14 @@ define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %ptr, align 4
+  %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
-  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  store i32 %bfe1, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: bfe_8_bfe_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -762,15 +762,15 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %ptr, align 4
+  %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16)
-  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  store i32 %bfe1, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; This really should be folded into 1
-define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: bfe_16_bfe_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -784,15 +784,15 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %ptr, align 4
+  %load = load i32, ptr addrspace(1) %ptr, align 4
   %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16)
   %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8)
-  store i32 %bfe1, i32 addrspace(1)* %out, align 4
+  store i32 %bfe1, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; Make sure there isn't a redundant BFE
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -809,11 +809,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8)
   %shl = shl i32 %bfe, 24
   %ashr = ashr i32 %shl, 24
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -830,11 +830,11 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0)
   %shl = shl i32 %bfe, 24
   %ashr = ashr i32 %shl, 24
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -849,16 +849,16 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %ptr, align 1
+  %load = load i8, ptr addrspace(1) %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8)
   %shl = shl i32 %bfe, 24
   %ashr = ashr i32 %shl, 24
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -873,16 +873,16 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %ptr, align 1
+  %load = load i8, ptr addrspace(1) %ptr, align 1
   %sext = sext i8 %load to i32
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0)
   %shl = shl i32 %bfe, 24
   %ashr = ashr i32 %shl, 24
-  store i32 %ashr, i32 addrspace(1)* %out, align 4
+  store i32 %ashr, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -896,15 +896,15 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -918,15 +918,15 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -940,11 +940,11 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 30
   %shr = ashr i32 %shl, 30
   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index d9061eb009df7..95ede542ddd7f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
+define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
 ; GCN-LABEL: set_inactive:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dword s3, s[0:1], 0x2c
@@ -16,11 +16,11 @@ define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) {
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
   %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0
-  store i32 %tmp, i32 addrspace(1)* %out
+  store i32 %tmp, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
+define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
 ; GCN-LABEL: set_inactive_64:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -36,11 +36,11 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) {
 ; GCN-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GCN-NEXT:    s_endpgm
   %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
-  store i64 %tmp, i64 addrspace(1)* %out
+  store i64 %tmp, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) {
+define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) {
 ; GCN-LABEL: set_inactive_scc:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
@@ -83,12 +83,12 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4
   br i1 %cmp, label %.zero, label %.one
 
 .zero:
-  store i32 %tmp, i32 addrspace(1)* %out
+  store i32 %tmp, ptr addrspace(1) %out
   br label %.exit
 
 .one:
   %tmp.1 = add i32 %tmp, 1
-  store i32 %tmp.1, i32 addrspace(1)* %out
+  store i32 %tmp.1, ptr addrspace(1) %out
   br label %.exit
 
 .exit:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
index 328f2e2b121bb..6d9f5d7143254 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
@@ -12,7 +12,7 @@ declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x
 define amdgpu_kernel void @buffer_atomic_add_f32_rtn(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
 main_body:
   %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
-  store float %ret, float* undef
+  store float %ret, ptr undef
   ret void
 }
 
@@ -21,6 +21,6 @@ main_body:
 define amdgpu_kernel void @buffer_atomic_add_v2f16_rtn(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset) {
 main_body:
   %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
-  store <2 x half> %ret, <2 x half>* undef
+  store <2 x half> %ret, ptr undef
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
index c13ff4c2cb2b2..a1b47b440d72e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
@@ -214,7 +214,7 @@ define amdgpu_ps float @struct_buffer_load_format_i32__sgpr_rsrc__vgpr_vindex__v
   ret float %fval
 }
 
-define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc, <4 x i32> addrspace(1)* %value, i32 addrspace(1)* %status) {
+define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) {
   ; CHECK-LABEL: name: struct_buffer_load_format_v4i32_tfe
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -245,15 +245,15 @@ define amdgpu_cs void @struct_buffer_load_format_v4i32_tfe(<4 x i32> inreg %rsrc
   %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
 
   %v = extractvalue { <4 x i32>, i32 } %load, 0
-  store <4 x i32> %v, <4 x i32> addrspace(1)* %value
+  store <4 x i32> %v, ptr addrspace(1) %value
 
   %s = extractvalue { <4 x i32>, i32 } %load, 1
-  store i32 %s, i32 addrspace(1)* %status
+  store i32 %s, ptr addrspace(1) %status
 
   ret void
 }
 
-define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc, <3 x i32> addrspace(1)* %value, i32 addrspace(1)* %status) {
+define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) {
   ; CHECK-LABEL: name: struct_buffer_load_format_v3i32_tfe
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -283,15 +283,15 @@ define amdgpu_cs void @struct_buffer_load_format_v3i32_tfe(<4 x i32> inreg %rsrc
   %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
 
   %v = extractvalue { <3 x i32>, i32 } %load, 0
-  store <3 x i32> %v, <3 x i32> addrspace(1)* %value
+  store <3 x i32> %v, ptr addrspace(1) %value
 
   %s = extractvalue { <3 x i32>, i32 } %load, 1
-  store i32 %s, i32 addrspace(1)* %status
+  store i32 %s, ptr addrspace(1) %status
 
   ret void
 }
 
-define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, i32 addrspace(1)* %value, i32 addrspace(1)* %status) {
+define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %value, ptr addrspace(1) %status) {
   ; CHECK-LABEL: name: struct_buffer_load_format_i32_tfe
   ; CHECK: bb.1 (%ir-block.0):
   ; CHECK-NEXT:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3
@@ -318,10 +318,10 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc,
   %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0)
 
   %v = extractvalue { i32, i32 } %load, 0
-  store i32 %v, i32 addrspace(1)* %value
+  store i32 %v, ptr addrspace(1) %value
 
   %s = extractvalue { i32, i32 } %load, 1
-  store i32 %s, i32 addrspace(1)* %status
+  store i32 %s, ptr addrspace(1) %status
 
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
index 608746f71e18b..a5f5454cf6107 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
@@ -96,7 +96,7 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b)
-  store volatile double %result, double* undef
+  store volatile double %result, ptr undef
   ret void
 }
 
@@ -129,7 +129,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7)
-  store volatile double %result, double* undef
+  store volatile double %result, ptr undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index c4fa11a15731c..43a0f018dc1cd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -41,7 +41,7 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
   ret i64 %bfe_i32
 }
 
-define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -56,11 +56,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -74,11 +74,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -92,11 +92,11 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
+define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -111,11 +111,11 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -128,11 +128,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
+define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -145,11 +145,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zextload_i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -163,15 +163,15 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp
 ; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in
+  %load = load i8, ptr addrspace(1) %in
   %ext = zext i8 %load to i32
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; FIXME: Should be using s_add_i32
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -186,15 +186,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -209,15 +209,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -232,15 +232,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -255,15 +255,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -278,15 +278,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 255
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -301,15 +301,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %load = load i32, i32 addrspace(1)* %in, align 4
+  %load = load i32, ptr addrspace(1) %in, align 4
   %add = add i32 %load, 1
   %ext = and i32 %add, 65535
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -322,13 +322,13 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -342,14 +342,14 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -363,14 +363,14 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -384,15 +384,15 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %shr = lshr i32 %shl, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -406,15 +406,15 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %shr = ashr i32 %shl, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -428,14 +428,14 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -449,14 +449,14 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -470,14 +470,14 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = shl i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -490,13 +490,13 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -509,13 +509,13 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -528,13 +528,13 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -547,14 +547,14 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4
+  store i32 %bfe, ptr addrspace(1) %out, align 4
   ret void
 }
 
 ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}}
-define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -568,13 +568,13 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = ashr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -588,13 +588,13 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
-  %x = load i32, i32 addrspace(1)* %in, align 4
+  %x = load i32, ptr addrspace(1) %in, align 4
   %shl = lshr i32 %x, 31
   %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1)
-  store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void
+  store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -606,11 +606,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -622,11 +622,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -638,11 +638,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -654,11 +654,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -670,11 +670,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -687,11 +687,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -704,11 +704,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -721,11 +721,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -738,11 +738,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -755,11 +755,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -772,11 +772,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -789,11 +789,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -806,11 +806,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -823,11 +823,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -840,11 +840,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -857,11 +857,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -873,11 +873,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -890,11 +890,11 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
+define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -907,7 +907,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out)
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1)
-  store i32 %bfe_u32, i32 addrspace(1)* %out, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out, align 4
   ret void
 }
 
@@ -915,7 +915,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out)
 ; reduced to the bits demanded by the bfe.
 
 ; XXX: The operand to v_bfe_u32 could also just directly be the load register.
-define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
+define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0,
 ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x4
@@ -934,17 +934,17 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out
 ; GFX6-NEXT:    s_mov_b64 s[4:5], s[2:3]
 ; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX6-NEXT:    s_endpgm
-                                            i32 addrspace(1)* %out1,
-                                            i32 addrspace(1)* %in) #0 {
-  %src = load i32, i32 addrspace(1)* %in, align 4
+                                            ptr addrspace(1) %out1,
+                                            ptr addrspace(1) %in) #0 {
+  %src = load i32, ptr addrspace(1) %in, align 4
   %and = and i32 %src, 63
   %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2)
-  store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4
-  store i32 %and, i32 addrspace(1)* %out1, align 4
+  store i32 %bfe_u32, ptr addrspace(1) %out0, align 4
+  store i32 %and, ptr addrspace(1) %out1, align 4
   ret void
 }
 
-define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 {
 ; GFX6-LABEL: lshr_and:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -958,11 +958,11 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-NEXT:    s_endpgm
   %b = lshr i32 %a, 6
   %c = and i32 %b, 7
-  store i32 %c, i32 addrspace(1)* %out, align 8
+  store i32 %c, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: v_lshr_and:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -976,11 +976,11 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0
 ; GFX6-NEXT:    s_endpgm
   %c = lshr i32 %a, %b
   %d = and i32 %c, 7
-  store i32 %d, i32 addrspace(1)* %out, align 8
+  store i32 %d, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 {
 ; GFX6-LABEL: and_lshr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -994,11 +994,11 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-NEXT:    s_endpgm
   %b = and i32 %a, 448
   %c = lshr i32 %b, 6
-  store i32 %c, i32 addrspace(1)* %out, align 8
+  store i32 %c, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 {
 ; GFX6-LABEL: and_lshr2:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -1012,11 +1012,11 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-NEXT:    s_endpgm
   %b = and i32 %a, 511
   %c = lshr i32 %b, 6
-  store i32 %c, i32 addrspace(1)* %out, align 8
+  store i32 %c, ptr addrspace(1) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
+define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
 ; GFX6-LABEL: shl_lshr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
@@ -1030,7 +1030,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-NEXT:    s_endpgm
   %b = shl i32 %a, 9
   %c = lshr i32 %b, 11
-  store i32 %c, i32 addrspace(1)* %out, align 8
+  store i32 %c, ptr addrspace(1) %out, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
index fc4f7f7d84a42..89f1c0b0f2a1d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
 
-define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
 ; GFX8-LABEL: dpp_test:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -39,10 +39,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2)
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false)
-  store i32 %tmp0, i32 addrspace(1)* %out
+  store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
-define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) {
+define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) {
 ; GFX8-LABEL: update_dpp64_test:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -89,10 +89,10 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
-  %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id
-  %load = load i64, i64 addrspace(1)* %gep
+  %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id
+  %load = load i64, ptr addrspace(1) %gep
   %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #1
-  store i64 %tmp0, i64 addrspace(1)* %gep
+  store i64 %tmp0, ptr addrspace(1) %gep
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
index 65384954a58a2..9e0c6024837d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
@@ -10,7 +10,7 @@ declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im
 
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
-define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_f32_16x16x16_f16:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23]
@@ -21,13 +21,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x float> %C)
-  store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; @llvm.amdgcn.wmma.f32.16x16x16.bf16
 
-define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_f32_16x16x16_bf16:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23]
@@ -38,13 +38,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B,
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C)
-  store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32
+  store <8 x float> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; @llvm.amdgcn.wmma.f16.16x16x16.f16
 
-define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_f16_16x16x16_f16_lo:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23]
@@ -55,11 +55,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 0)
-  store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32
+  store <16 x half> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_f16_16x16x16_f16_hi:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1]
@@ -70,13 +70,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 1)
-  store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32
+  store <16 x half> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16
 
-define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_lo:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23]
@@ -87,11 +87,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 0)
-  store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32
+  store <16 x i16> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_hi:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1]
@@ -102,13 +102,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 1)
-  store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32
+  store <16 x i16> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; @llvm.amdgcn.wmma.i32.16x16x16.iu8
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15]
@@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0]
@@ -134,11 +134,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A,
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0]
@@ -149,11 +149,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A,
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0]
@@ -164,11 +164,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] clamp
@@ -179,11 +179,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] clamp
@@ -194,11 +194,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] clamp
@@ -209,11 +209,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] clamp
@@ -224,13 +224,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 ; @llvm.amdgcn.wmma.i32.16x16x16.iu4
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11]
@@ -241,11 +241,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0]
@@ -256,11 +256,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A,
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0]
@@ -271,11 +271,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A,
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0]
@@ -286,12 +286,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp
@@ -302,11 +302,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] clamp
@@ -317,11 +317,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] clamp
@@ -332,11 +332,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) {
 ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp:
 ; W32:       ; %bb.0: ; %bb
 ; W32-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] clamp
@@ -347,7 +347,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32>
 ; W32-NEXT:    s_endpgm
 bb:
   %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1)
-  store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32
+  store <8 x i32> %res, ptr addrspace(1) %out, align 32
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
index 51b5d6c464a2b..a12f13564fe99 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
@@ -10,7 +10,7 @@ declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im
 
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
-define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_f32_16x16x16_f16:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
@@ -19,13 +19,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <4 x float> %C)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; @llvm.amdgcn.wmma.f32.16x16x16.bf16
 
-define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_f32_16x16x16_bf16:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
@@ -34,13 +34,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B,
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16
+  store <4 x float> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; @llvm.amdgcn.wmma.f16.16x16x16.f16
 
-define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_f16_16x16x16_f16_lo:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19]
@@ -49,11 +49,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 0)
-  store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16
+  store <8 x half> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_f16_16x16x16_f16_hi:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
@@ -62,13 +62,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 1)
-  store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16
+  store <8 x half> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16
 
-define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_lo:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19]
@@ -77,11 +77,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 0)
-  store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16
+  store <8 x i16> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_hi:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1]
@@ -90,13 +90,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 1)
-  store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16
+  store <8 x i16> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; @llvm.amdgcn.wmma.i32.16x16x16.iu8
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11]
@@ -105,12 +105,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0]
@@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A,
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0]
@@ -132,11 +132,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A,
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0]
@@ -145,11 +145,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp
@@ -158,11 +158,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] clamp
@@ -171,11 +171,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] clamp
@@ -184,11 +184,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] clamp
@@ -197,13 +197,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
 ; @llvm.amdgcn.wmma.i32.16x16x16.iu4
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7]
@@ -212,11 +212,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0]
@@ -225,11 +225,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A,
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0]
@@ -238,11 +238,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A,
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0]
@@ -251,11 +251,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp
@@ -264,11 +264,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] clamp
@@ -277,11 +277,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] clamp
@@ -290,11 +290,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
-define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) {
+define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) {
 ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp:
 ; W64:       ; %bb.0: ; %bb
 ; W64-NEXT:    v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] clamp
@@ -303,7 +303,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32>
 ; W64-NEXT:    s_endpgm
 bb:
   %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1)
-  store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16
+  store <4 x i32> %res, ptr addrspace(1) %out, align 16
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
index 3af3c59c0092a..148a35fb98cac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
@@ -34,9 +34,9 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workgroup.id.x()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -61,9 +61,9 @@ define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workgroup.id.y()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -96,9 +96,9 @@ define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 {
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
-define amdgpu_kernel void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workgroup.id.z()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index fa246079e7d28..0ee242bd5c6c3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -22,9 +22,9 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0
 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0
 
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0
-define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -40,9 +40,9 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1
-define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -58,9 +58,9 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
 ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10
 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]]
 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2
-define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -69,12 +69,12 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
 ; ALL-NOT: v0
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
-define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 {
+define amdgpu_kernel void @test_workitem_id_x_usex2(ptr addrspace(1) %out) #1 {
   %id0 = call i32 @llvm.amdgcn.workitem.id.x()
-  store volatile i32 %id0, i32 addrspace(1)* %out
+  store volatile i32 %id0, ptr addrspace(1) %out
 
   %id1 = call i32 @llvm.amdgcn.workitem.id.x()
-  store volatile i32 %id1, i32 addrspace(1)* %out
+  store volatile i32 %id1, ptr addrspace(1) %out
   ret void
 }
 
@@ -83,15 +83,15 @@ define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 {
 ; ALL: {{flat|global}}_store_{{dword|b32}}
 ; ALL-NOT: v0
 ; ALL: {{flat|global}}_store_{{dword|b32}} v{{.*}}, v0
-define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(i32 addrspace(1)* %out, i32 %arg) #1 {
+define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(ptr addrspace(1) %out, i32 %arg) #1 {
 bb0:
-  store volatile i32 0, i32 addrspace(1)* %out
+  store volatile i32 0, ptr addrspace(1) %out
   %cond = icmp eq i32 %arg, 0
   br i1 %cond, label %bb1, label %bb2
 
 bb1:
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  store volatile i32 %id, i32 addrspace(1)* %out
+  store volatile i32 %id, ptr addrspace(1) %out
   br label %bb2
 
 bb2:
@@ -102,27 +102,27 @@ bb2:
 ; ALL: s_waitcnt
 ; HSA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
 ; MESA-NEXT: v_and_b32_e32 v2, 0x3ff, v31
-define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 {
+define void @test_workitem_id_x_func(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.x()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
 ; ALL-LABEL: {{^}}test_workitem_id_y_func:
 ; HSA: v_bfe_u32 v2, v31, 10, 10
 ; MESA: v_bfe_u32 v2, v31, 10, 10
-define void @test_workitem_id_y_func(i32 addrspace(1)* %out) #1 {
+define void @test_workitem_id_y_func(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.y()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
 ; ALL-LABEL: {{^}}test_workitem_id_z_func:
 ; HSA: v_bfe_u32 v2, v31, 20, 10
 ; MESA: v_bfe_u32 v2, v31, 20, 10
-define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
+define void @test_workitem_id_z_func(ptr addrspace(1) %out) #1 {
   %id = call i32 @llvm.amdgcn.workitem.id.z()
-  store i32 %id, i32 addrspace(1)* %out
+  store i32 %id, ptr addrspace(1) %out
   ret void
 }
 
@@ -139,13 +139,13 @@ define void @test_workitem_id_z_func(i32 addrspace(1)* %out) #1 {
 
 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 {
+define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
   %id.z = call i32 @llvm.amdgcn.workitem.id.z()
-  store volatile i32 %id.x, i32* %out
-  store volatile i32 %id.y, i32* %out
-  store volatile i32 %id.z, i32* %out
+  store volatile i32 %id.x, ptr %out
+  store volatile i32 %id.y, ptr %out
+  store volatile i32 %id.z, ptr %out
   ret void
 }
 
@@ -161,13 +161,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
 
 ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]]
-define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 {
+define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
   %id.z = call i32 @llvm.amdgcn.workitem.id.z()
-  store volatile i32 %id.x, i32* %out
-  store volatile i32 %id.y, i32* %out
-  store volatile i32 %id.z, i32* %out
+  store volatile i32 %id.x, ptr %out
+  store volatile i32 %id.y, ptr %out
+  store volatile i32 %id.z, ptr %out
   ret void
 }
 
@@ -182,13 +182,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work
 
 ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20
 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]]
-define amdgpu_kernel void @test_reqd_workgroup_size_z_only(i32* %out) !reqd_work_group_size !2 {
+define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_group_size !2 {
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
   %id.y = call i32 @llvm.amdgcn.workitem.id.y()
   %id.z = call i32 @llvm.amdgcn.workitem.id.z()
-  store volatile i32 %id.x, i32* %out
-  store volatile i32 %id.y, i32* %out
-  store volatile i32 %id.z, i32* %out
+  store volatile i32 %id.x, ptr %out
+  store volatile i32 %id.y, ptr %out
+  store volatile i32 %id.z, ptr %out
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
index 8ff79e1184ce4..49502b3435609 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.memcpy.inline.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1 immarg)
+declare void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg)
 
-define amdgpu_cs void @test(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) {
+define amdgpu_cs void @test(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; GCN-LABEL: test:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_mov_b32 s2, 0
@@ -25,6 +25,6 @@ define amdgpu_cs void @test(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3
 ; GCN-NEXT:    s_endpgm
-  call void @llvm.memcpy.inline.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 4, i1 false)
+  call void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 4, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
index 52163982d1e2d..ef8db23aaa672 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1 immarg)
+declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg)
 
-define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) {
+define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; LOOP-LABEL: memcpy_p1i8:
 ; LOOP:       ; %bb.0:
 ; LOOP-NEXT:    s_mov_b32 s2, 0
@@ -225,7 +225,7 @@ define amdgpu_cs void @memcpy_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src)
 ; UNROLL-NEXT:    s_waitcnt vmcnt(0)
 ; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:19
 ; UNROLL-NEXT:    s_endpgm
-  call void @llvm.memcpy.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 20, i1 false)
+  call void @llvm.memcpy.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 20, i1 false)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
index a1e40dd5da91d..c7a1e163c04aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)*, i8 addrspace(1)*, i32, i1)
+declare void @llvm.memmove.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1)
 
-define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src) {
+define amdgpu_cs void @memmove_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; LOOP-LABEL: memmove_p1i8:
 ; LOOP:       ; %bb.0:
 ; LOOP-NEXT:    v_cmp_ge_u64_e32 vcc, v[2:3], v[0:1]
@@ -76,6 +76,6 @@ define amdgpu_cs void @memmove_p1i8(i8 addrspace(1)* %dst, i8 addrspace(1)* %src
 ; UNROLL-NEXT:    s_waitcnt vmcnt(3)
 ; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3
 ; UNROLL-NEXT:    s_endpgm
-  call void @llvm.memmove.p1i8.p1i8.i32(i8 addrspace(1)* %dst, i8 addrspace(1)* %src, i32 4, i1 false)
+  call void @llvm.memmove.p1.p1.i32(ptr addrspace(1) %dst, ptr addrspace(1) %src, i32 4, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
index cdc59777252ff..dd3f9f12111dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
@@ -2,9 +2,9 @@
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -amdgpu-mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memset.p1i8.i32(i8 addrspace(1)*, i8, i32, i1)
+declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1)
 
-define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) {
+define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
 ; LOOP-LABEL: memset_p1i8:
 ; LOOP:       ; %bb.0: ; %loadstoreloop.preheader
 ; LOOP-NEXT:    s_mov_b64 s[0:1], 0
@@ -34,6 +34,6 @@ define amdgpu_cs void @memset_p1i8(i8 addrspace(1)* %dst, i8 %val) {
 ; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:2
 ; UNROLL-NEXT:    buffer_store_byte v2, v[0:1], s[0:3], 0 addr64 offset:3
 ; UNROLL-NEXT:    s_endpgm
-  call void @llvm.memset.p1i8.i32(i8 addrspace(1)* %dst, i8 %val, i32 4, i1 false)
+  call void @llvm.memset.p1.i32(ptr addrspace(1) %dst, i8 %val, i32 4, i1 false)
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
index 98bc9815f001d..469e6ddad0a07 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
@@ -7,7 +7,7 @@
 ; FIXME:
 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s
 
-define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) {
+define <3 x i32> @v_load_constant_v3i32_align1(ptr addrspace(4) %ptr) {
 ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
 ; GFX9-UNALIGNED:       ; %bb.0:
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -112,11 +112,11 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v4, v3
 ; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v6, v5
 ; GFX7-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1
   ret <3 x i32> %load
 }
 
-define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) {
+define <3 x i32> @v_load_constant_v3i32_align2(ptr addrspace(4) %ptr) {
 ; GFX9-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
 ; GFX9-UNALIGNED:       ; %bb.0:
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -176,11 +176,11 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v7
 ; GFX7-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2
   ret <3 x i32> %load
 }
 
-define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) {
+define <3 x i32> @v_load_constant_v3i32_align4(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -197,11 +197,11 @@ define <3 x i32> @v_load_constant_v3i32_align4(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4
   ret <3 x i32> %load
 }
 
-define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) {
+define i96 @v_load_constant_i96_align8(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_i96_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -218,11 +218,11 @@ define i96 @v_load_constant_i96_align8(i96 addrspace(4)* %ptr) {
 ; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i96, i96 addrspace(4)* %ptr, align 8
+  %load = load i96, ptr addrspace(4) %ptr, align 8
   ret i96 %load
 }
 
-define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) {
+define <3 x i32> @v_load_constant_v3i32_align8(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -239,11 +239,11 @@ define <3 x i32> @v_load_constant_v3i32_align8(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8
   ret <3 x i32> %load
 }
 
-define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) {
+define <6 x i16> @v_load_constant_v6i16_align8(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_v6i16_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -266,11 +266,11 @@ define <6 x i16> @v_load_constant_v6i16_align8(<6 x i16> addrspace(4)* %ptr) {
 ; GFX7-NEXT:    v_mov_b32_e32 v2, v7
 ; GFX7-NEXT:    v_mov_b32_e32 v4, v8
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8
+  %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8
   ret <6 x i16> %load
 }
 
-define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) {
+define <12 x i8> @v_load_constant_v12i8_align8(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_v12i8_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -313,11 +313,11 @@ define <12 x i8> @v_load_constant_v12i8_align8(<12 x i8> addrspace(4)* %ptr) {
 ; GFX7-NEXT:    v_mov_b32_e32 v1, v13
 ; GFX7-NEXT:    v_mov_b32_e32 v2, v12
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8
+  %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8
   ret <12 x i8> %load
 }
 
-define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) {
+define <3 x i32> @v_load_constant_v3i32_align16(ptr addrspace(4) %ptr) {
 ; GFX9-LABEL: v_load_constant_v3i32_align16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -334,11 +334,11 @@ define <3 x i32> @v_load_constant_v3i32_align16(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NEXT:    buffer_load_dwordx3 v[0:2], v[0:1], s[4:7], 0 addr64
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16
   ret <3 x i32> %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(ptr addrspace(4) inreg %ptr) {
 ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align1:
 ; GFX9-UNALIGNED:       ; %bb.0:
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
@@ -451,11 +451,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)*
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX7-NOUNALIGNED-NEXT:    ; return to shader part epilog
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 1
   ret <3 x i32> %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(ptr addrspace(4) inreg %ptr) {
 ; GFX9-UNALIGNED-LABEL: s_load_constant_v3i32_align2:
 ; GFX9-UNALIGNED:       ; %bb.0:
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0
@@ -523,11 +523,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)*
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX7-NOUNALIGNED-NEXT:    ; return to shader part epilog
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 2
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 2
   ret <3 x i32> %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: s_load_constant_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -545,11 +545,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align4(<3 x i32> addrspace(4)*
 ; GFX7-NEXT:    s_mov_b32 s0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s5
 ; GFX7-NEXT:    ; return to shader part epilog
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 4
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 4
   ret <3 x i32> %load
 }
 
-define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) {
+define amdgpu_ps i96 @s_load_constant_i96_align8(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: s_load_constant_i96_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -567,11 +567,11 @@ define amdgpu_ps i96 @s_load_constant_i96_align8(i96 addrspace(4)* inreg %ptr) {
 ; GFX7-NEXT:    s_mov_b32 s0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s5
 ; GFX7-NEXT:    ; return to shader part epilog
-  %load = load i96, i96 addrspace(4)* %ptr, align 8
+  %load = load i96, ptr addrspace(4) %ptr, align 8
   ret i96 %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: s_load_constant_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -589,11 +589,11 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align8(<3 x i32> addrspace(4)*
 ; GFX7-NEXT:    s_mov_b32 s0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s5
 ; GFX7-NEXT:    ; return to shader part epilog
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 8
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 8
   ret <3 x i32> %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: s_load_constant_v6i16_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
@@ -611,12 +611,12 @@ define amdgpu_ps <3 x i32> @s_load_constant_v6i16_align8(<6 x i16> addrspace(4)*
 ; GFX7-NEXT:    s_mov_b32 s0, s4
 ; GFX7-NEXT:    s_mov_b32 s1, s5
 ; GFX7-NEXT:    ; return to shader part epilog
-  %load = load <6 x i16>, <6 x i16> addrspace(4)* %ptr, align 8
+  %load = load <6 x i16>, ptr addrspace(4) %ptr, align 8
   %cast = bitcast <6 x i16> %load to <3 x i32>
   ret <3 x i32> %cast
 }
 
-define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(ptr addrspace(4) inreg %ptr) {
 ; GFX9-LABEL: s_load_constant_v12i8_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x0
@@ -652,16 +652,16 @@ define amdgpu_ps <12 x i8> @s_load_constant_v12i8_align8(<12 x i8> addrspace(4)*
 ; GFX7-NEXT:    s_mov_b32 s0, s12
 ; GFX7-NEXT:    s_mov_b32 s4, s13
 ; GFX7-NEXT:    ; return to shader part epilog
-  %load = load <12 x i8>, <12 x i8> addrspace(4)* %ptr, align 8
+  %load = load <12 x i8>, ptr addrspace(4) %ptr, align 8
   ret <12 x i8> %load
 }
 
-define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(<3 x i32> addrspace(4)* inreg %ptr) {
+define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align16(ptr addrspace(4) inreg %ptr) {
 ; GCN-LABEL: s_load_constant_v3i32_align16:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    ; return to shader part epilog
-  %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 16
+  %load = load <3 x i32>, ptr addrspace(4) %ptr, align 16
   ret <3 x i32> %load
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
index f0ed8f07c98e2..44e4320cddb22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant32bit.ll
@@ -35,8 +35,8 @@ define amdgpu_ps float @load_constant32bit_vgpr_offset(i32 %arg) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ; return to shader part epilog
 entry:
-  %gep = getelementptr <{ [4294967295 x float] }>, <{ [4294967295 x float] }> addrspace(6)* null, i32 0, i32 0, i32 %arg
-  %load = load float, float addrspace(6)* %gep, align 4
+  %gep = getelementptr <{ [4294967295 x float] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg
+  %load = load float, ptr addrspace(6) %gep, align 4
   ret float %load
 }
 
@@ -49,13 +49,13 @@ define amdgpu_ps i32 @load_constant32bit_sgpr_offset(i32 inreg %arg) {
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    ; return to shader part epilog
 entry:
-  %gep = getelementptr <{ [4294967295 x i32] }>, <{ [4294967295 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %arg
-  %load = load i32, i32 addrspace(6)* %gep, align 4
+  %gep = getelementptr <{ [4294967295 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %arg
+  %load = load i32, ptr addrspace(6) %gep, align 4
   ret i32 %load
 }
 
 ; This gets split during regbankselect
-define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(<8 x float> addrspace(6)* %arg) {
+define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(ptr addrspace(6) %arg) {
 ; GFX6-LABEL: load_constant32bit_vgpr_v8f32:
 ; GFX6:       ; %bb.0: ; %entry
 ; GFX6-NEXT:    v_mov_b32_e32 v4, v0
@@ -88,6 +88,6 @@ define amdgpu_ps <8 x float> @load_constant32bit_vgpr_v8f32(<8 x float> addrspac
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    ; return to shader part epilog
 entry:
-  %load = load <8 x float>, <8 x float> addrspace(6)* %arg, align 32
+  %load = load <8 x float>, ptr addrspace(6) %arg, align 32
   ret <8 x float> %load
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
index 0d9abad9aa764..6f9045d4cb664 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.128.ll
@@ -7,7 +7,7 @@
 ; FIXME:
 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
 
-define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,11 +38,11 @@ define <4 x i32> @load_lds_v4i32(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b128 v[0:3], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr
+  %load = load <4 x i32>, ptr addrspace(3) %ptr
   ret <4 x i32> %load
 }
 
-define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align1(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -245,11 +245,11 @@ define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
 ; GFX11-NEXT:    v_or3_b32 v3, v11, v12, v10
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 1
   ret <4 x i32> %load
 }
 
-define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align2(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -339,11 +339,11 @@ define <4 x i32> @load_lds_v4i32_align2(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_lshl_or_b32 v3, v8, 16, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 2
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 2
   ret <4 x i32> %load
 }
 
-define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align4(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -382,11 +382,11 @@ define <4 x i32> @load_lds_v4i32_align4(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_2addr_b32 v[2:3], v2 offset0:2 offset1:3
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 4
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 4
   ret <4 x i32> %load
 }
 
-define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align8(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -419,11 +419,11 @@ define <4 x i32> @load_lds_v4i32_align8(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_2addr_b64 v[0:3], v0 offset1:1
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 8
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 8
   ret <4 x i32> %load
 }
 
-define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align16(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -454,6 +454,6 @@ define <4 x i32> @load_lds_v4i32_align16(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b128 v[0:3], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 16
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 16
   ret <4 x i32> %load
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
index 61d7a9f32c7e3..0bb4db73db640 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-local.96.ll
@@ -7,7 +7,7 @@
 ; FIXME:
 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
 
-define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -38,11 +38,11 @@ define <3 x i32> @load_lds_v3i32(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b96 v[0:2], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr
+  %load = load <3 x i32>, ptr addrspace(3) %ptr
   ret <3 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -202,11 +202,11 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-NEXT:    v_or3_b32 v2, v8, v9, v7
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1
   ret <3 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align2(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -280,11 +280,11 @@ define <3 x i32> @load_lds_v3i32_align2(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    v_lshl_or_b32 v2, v6, 16, v5
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 2
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 2
   ret <3 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align4(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -323,11 +323,11 @@ define <3 x i32> @load_lds_v3i32_align4(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b32 v2, v2 offset:8
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 4
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 4
   ret <3 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align8(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -366,11 +366,11 @@ define <3 x i32> @load_lds_v3i32_align8(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b32 v2, v2 offset:8
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 8
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 8
   ret <3 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align16(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -401,6 +401,6 @@ define <3 x i32> @load_lds_v3i32_align16(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b96 v[0:2], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 16
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 16
   ret <3 x i32> %load
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
index caaf3d750bba1..0234119c01c67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-unaligned.ll
@@ -8,7 +8,7 @@
 ; LDS alignment enforcement is controlled by a configuration register:
 ; SH_MEM_CONFIG.alignment_mode
 
-define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
+define <4 x i32> @load_lds_v4i32_align1(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -89,11 +89,11 @@ define <4 x i32> @load_lds_v4i32_align1(<4 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b128 v[0:3], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <4 x i32>, <4 x i32> addrspace(3)* %ptr, align 1
+  %load = load <4 x i32>, ptr addrspace(3) %ptr, align 1
   ret <4 x i32> %load
 }
 
-define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
+define <3 x i32> @load_lds_v3i32_align1(ptr addrspace(3) %ptr) {
 ; GFX9-LABEL: load_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -162,11 +162,11 @@ define <3 x i32> @load_lds_v3i32_align1(<3 x i32> addrspace(3)* %ptr) {
 ; GFX11-NEXT:    ds_load_b96 v[0:2], v0
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load <3 x i32>, <3 x i32> addrspace(3)* %ptr, align 1
+  %load = load <3 x i32>, ptr addrspace(3) %ptr, align 1
   ret <3 x i32> %load
 }
 
-define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -225,11 +225,11 @@ define void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x)
 ; GFX11-NEXT:    ds_store_b128 v0, v[1:4]
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1
+  store <4 x i32> %x, ptr addrspace(3) %out, align 1
   ret void
 }
 
-define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -281,11 +281,11 @@ define void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x)
 ; GFX11-NEXT:    ds_store_b96 v0, v[1:3]
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1
+  store <3 x i32> %x, ptr addrspace(3) %out, align 1
   ret void
 }
 
-define amdgpu_ps void @test_s_load_constant_v8i32_align1(<8 x i32> addrspace(4)* inreg %ptr, <8 x i32> addrspace(1)* inreg %out) {
+define amdgpu_ps void @test_s_load_constant_v8i32_align1(ptr addrspace(4) inreg %ptr, ptr addrspace(1) inreg %out) {
 ; GFX9-LABEL: test_s_load_constant_v8i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
@@ -336,7 +336,7 @@ define amdgpu_ps void @test_s_load_constant_v8i32_align1(<8 x i32> addrspace(4)*
 ; GFX11-NEXT:    global_store_b128 v8, v[4:7], s[2:3] offset:16
 ; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
 ; GFX11-NEXT:    s_endpgm
-  %load = load <8 x i32>, <8 x i32> addrspace(4)* %ptr, align 1
-  store <8 x i32> %load, <8 x i32> addrspace(1)* %out
+  %load = load <8 x i32>, ptr addrspace(4) %ptr, align 1
+  store <8 x i32> %load, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index 672b4d9445d33..b5fa7de724bf9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -64,21 +64,21 @@ entry:
   br i1 %cond, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 123, i32 addrspace(1)* undef
-  store volatile i32 456, i32 addrspace(1)* undef
-  store volatile i32 999, i32 addrspace(1)* undef
-  store volatile i32 1000, i32 addrspace(1)* undef
-  store volatile i32 455, i32 addrspace(1)* undef
-  store volatile i32 23526, i32 addrspace(1)* undef
+  store volatile i32 123, ptr addrspace(1) undef
+  store volatile i32 456, ptr addrspace(1) undef
+  store volatile i32 999, ptr addrspace(1) undef
+  store volatile i32 1000, ptr addrspace(1) undef
+  store volatile i32 455, ptr addrspace(1) undef
+  store volatile i32 23526, ptr addrspace(1) undef
   br label %bb2
 
 bb1:
-  store volatile i32 23526, i32 addrspace(1)* undef
-  store volatile i32 455, i32 addrspace(1)* undef
-  store volatile i32 1000, i32 addrspace(1)* undef
-  store volatile i32 456, i32 addrspace(1)* undef
-  store volatile i32 999, i32 addrspace(1)* undef
-  store volatile i32 123, i32 addrspace(1)* undef
+  store volatile i32 23526, ptr addrspace(1) undef
+  store volatile i32 455, ptr addrspace(1) undef
+  store volatile i32 1000, ptr addrspace(1) undef
+  store volatile i32 456, ptr addrspace(1) undef
+  store volatile i32 999, ptr addrspace(1) undef
+  store volatile i32 123, ptr addrspace(1) undef
   br label %bb2
 
 bb2:
@@ -146,13 +146,13 @@ entry:
   br i1 %cond, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* @gv0
-  store volatile i32 1, i32 addrspace(1)* @gv1
+  store volatile i32 0, ptr addrspace(1) @gv0
+  store volatile i32 1, ptr addrspace(1) @gv1
   br label %bb2
 
 bb1:
-  store volatile i32 0, i32 addrspace(1)* @gv2
-  store volatile i32 1, i32 addrspace(1)* @gv3
+  store volatile i32 0, ptr addrspace(1) @gv2
+  store volatile i32 1, ptr addrspace(1) @gv3
   br label %bb2
 
 bb2:
@@ -214,13 +214,13 @@ entry:
   br i1 %cond, label %bb0, label %bb1
 
 bb0:
-  store volatile i32 0, i32 addrspace(1)* @static.gv0
-  store volatile i32 1, i32 addrspace(1)* @static.gv1
+  store volatile i32 0, ptr addrspace(1) @static.gv0
+  store volatile i32 1, ptr addrspace(1) @static.gv1
   br label %bb2
 
 bb1:
-  store volatile i32 0, i32 addrspace(1)* @static.gv2
-  store volatile i32 1, i32 addrspace(1)* @static.gv3
+  store volatile i32 0, ptr addrspace(1) @static.gv2
+  store volatile i32 1, ptr addrspace(1) @static.gv3
   br label %bb2
 
 bb2:
@@ -228,7 +228,7 @@ bb2:
 }
 
 ; This would crash from using the wrong insert point
-define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) {
+define void @sink_null_insert_pt(ptr addrspace(4) %arg0) {
 ; GFX9-LABEL: sink_null_insert_pt:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -255,7 +255,7 @@ define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 entry:
-  %load0 = load volatile i32, i32 addrspace(1)* null, align 4
+  %load0 = load volatile i32, ptr addrspace(1) null, align 4
   br label %bb1
 
 bb1:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
index ff3f4363ee200..ab9ae86c5abcb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s
 
-define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, i32 addrspace(6)* inreg %arg3) {
+define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspace(6) inreg %arg3) {
 ; CHECK-LABEL: test1:
 ; CHECK:       ; %bb.0: ; %.entry
 ; CHECK-NEXT:    v_and_b32_e32 v3, 0x3ffffffc, v0
@@ -15,23 +15,23 @@ define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, i32 addrspac
 ; CHECK-NEXT:    s_endpgm
 .entry:
   %bs1 = and i32 %arg1, 1073741820
-  %ep1 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs1
-  %ad1 = ptrtoint i32 addrspace(6)* %ep1 to i32
+  %ep1 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs1
+  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
 
   %bs2 = or i32 %bs1, 1
-  %ep2 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs2
-  %ad2 = ptrtoint i32 addrspace(6)* %ep2 to i32
+  %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
+  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
 
   %bs3 = or i32 %bs1, 2
-  %ep3 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs3
-  %ad3 = ptrtoint i32 addrspace(6)* %ep3 to i32
+  %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
+  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
 
   %bs4 = or i32 %bs1, 3
-  %ep4 = getelementptr i32, i32 addrspace(6)* %arg3, i32 %bs4
-  %ad4 = ptrtoint i32 addrspace(6)* %ep4 to i32
+  %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
+  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
 
   ret void
@@ -50,23 +50,23 @@ define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) {
 ; CHECK-NEXT:    s_endpgm
 .entry:
   %bs1 = and i32 %arg1, 1073741820
-  %ep1 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs1
-  %ad1 = ptrtoint i32 addrspace(6)* %ep1 to i32
+  %ep1 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs1
+  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
 
   %bs2 = or i32 %bs1, 1
-  %ep2 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs2
-  %ad2 = ptrtoint i32 addrspace(6)* %ep2 to i32
+  %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
+  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
 
   %bs3 = or i32 %bs1, 2
-  %ep3 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs3
-  %ad3 = ptrtoint i32 addrspace(6)* %ep3 to i32
+  %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
+  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
 
   %bs4 = or i32 %bs1, 3
-  %ep4 = getelementptr <{ [64 x i32] }>, <{ [64 x i32] }> addrspace(6)* null, i32 0, i32 0, i32 %bs4
-  %ad4 = ptrtoint i32 addrspace(6)* %ep4 to i32
+  %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
+  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
 
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
index 7295442324b6f..4e1728a369dc0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mubuf-global.ll
@@ -5,7 +5,7 @@
 ; Test end to end matching of addressing modes when MUBUF is used for
 ; global memory.
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -25,11 +25,11 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr(i32 addrspace(1)* inreg %ptr) {
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
-  store i32 0, i32 addrspace(1)* %ptr
+  store i32 0, ptr addrspace(1) %ptr
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -51,12 +51,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %
 ; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 0
@@ -84,12 +84,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967296(i32 addrspace(1)* i
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4294967297:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 4
@@ -117,12 +117,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4294967297(i32 addrspace(1)* i
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4096:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -144,12 +144,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4096(i32 addrspace(1)* inreg %
 ; GFX7-NEXT:    s_movk_i32 s4, 0x4000
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], s4
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s2, 0
@@ -169,12 +169,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
 ; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 0
@@ -194,12 +194,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967296(i32 addrspace(1)* %
 ; GFX7-NEXT:    s_mov_b32 s2, s0
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %ptr) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4294967297:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 4
@@ -219,12 +219,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4294967297(i32 addrspace(1)* %
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967297
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967297
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_offset4096:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s2, 0
@@ -244,12 +244,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_offset4096(i32 addrspace(1)* %ptr) {
 ; GFX7-NEXT:    s_movk_i32 s4, 0x4000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4096
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4096
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg %ptr, i32 inreg %soffset) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_sgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s5, s4, 31
@@ -277,12 +277,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_sgpr_offset(i32 addrspace(1)* inreg
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -302,12 +302,12 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset(i32 addrspace(1)* %ptr,
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset_offset256:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -327,13 +327,13 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset_offset256(i32 addrspace(
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
 ; GFX7-NEXT:    s_endpgm
-  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %soffset
-  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 256
-  store i32 0, i32 addrspace(1)* %gep1
+  %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %soffset
+  %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 256
+  store i32 0, ptr addrspace(1) %gep1
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_store_vgpr_ptr_sgpr_offset256_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -353,13 +353,13 @@ define amdgpu_ps void @mubuf_store_vgpr_ptr_sgpr_offset256_offset(i32 addrspace(
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 offset:1024
 ; GFX7-NEXT:    s_endpgm
-  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 256
-  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %soffset
-  store i32 0, i32 addrspace(1)* %gep1
+  %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 256
+  %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %soffset
+  store i32 0, ptr addrspace(1) %gep1
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -383,12 +383,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
-  store i32 0, i32 addrspace(1)* %gep
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
+  store i32 0, ptr addrspace(1) %gep
   ret void
 }
 
-define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_vgpr_offset_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -414,12 +414,12 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_vgpr_offset_offset4095(i32 addrspace
 ; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
-  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 4095
-  store i32 0, i32 addrspace(1)* %gep1
+  %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
+  %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 4095
+  store i32 0, ptr addrspace(1) %gep1
   ret void
 }
-define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_store_sgpr_ptr_offset4095_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -445,13 +445,13 @@ define amdgpu_ps void @mubuf_store_sgpr_ptr_offset4095_vgpr_offset(i32 addrspace
 ; GFX7-NEXT:    s_movk_i32 s4, 0x3ffc
 ; GFX7-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], s4 addr64
 ; GFX7-NEXT:    s_endpgm
-  %gep0 = getelementptr i32, i32 addrspace(1)* %ptr, i32 4095
-  %gep1 = getelementptr i32, i32 addrspace(1)* %gep0, i32 %voffset
-  store i32 0, i32 addrspace(1)* %gep1
+  %gep0 = getelementptr i32, ptr addrspace(1) %ptr, i32 4095
+  %gep1 = getelementptr i32, ptr addrspace(1) %gep0, i32 %voffset
+  store i32 0, ptr addrspace(1) %gep1
   ret void
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -471,11 +471,11 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr(float addrspace(1)* inreg %ptr) {
 ; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %val = load volatile float, float addrspace(1)* %ptr
+  %val = load volatile float, ptr addrspace(1) %ptr
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -497,12 +497,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095(float addrspace(1)* inreg
 ; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 0
@@ -530,12 +530,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967296(float addrspace(1)*
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4294967297:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 4
@@ -563,12 +563,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4294967297(float addrspace(1)*
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4096:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -590,12 +590,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4096(float addrspace(1)* inreg
 ; GFX7-NEXT:    buffer_load_dword v0, off, s[0:3], s4 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s2, 0
@@ -615,12 +615,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4095(float addrspace(1)* %ptr)
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4095
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4095
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 0
@@ -640,12 +640,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967296(float addrspace(1)*
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967296
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967296
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4294967297:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 4
@@ -665,12 +665,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4294967297(float addrspace(1)*
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4294967297
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4294967297
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_offset4096:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s2, 0
@@ -690,12 +690,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_offset4096(float addrspace(1)* %ptr)
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4096
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i64 4096
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inreg %ptr, i32 inreg %soffset) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(ptr addrspace(1) inreg %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_sgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s5, s4, 31
@@ -723,12 +723,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_sgpr_offset(float addrspace(1)* inre
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -748,12 +748,12 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset(float addrspace(1)* %ptr
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset_offset256:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -773,13 +773,13 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset_offset256(float addrspac
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %soffset
-  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 256
-  %val = load volatile float, float addrspace(1)* %gep1
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %soffset
+  %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 256
+  %val = load volatile float, ptr addrspace(1) %gep1
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspace(1)* %ptr, i32 inreg %soffset) {
+define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(ptr addrspace(1) %ptr, i32 inreg %soffset) {
 ; GFX6-LABEL: mubuf_load_vgpr_ptr_sgpr_offset256_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
@@ -799,13 +799,13 @@ define amdgpu_ps float @mubuf_load_vgpr_ptr_sgpr_offset256_offset(float addrspac
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 offset:1024 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 256
-  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %soffset
-  %val = load volatile float, float addrspace(1)* %gep1
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 256
+  %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %soffset
+  %val = load volatile float, ptr addrspace(1) %gep1
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -829,12 +829,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset(float addrspace(1)* inre
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
-  %val = load volatile float, float addrspace(1)* %gep
+  %gep = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset
+  %val = load volatile float, ptr addrspace(1) %gep
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_vgpr_offset_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -860,12 +860,12 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_vgpr_offset_offset4095(float addrspa
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i32 %voffset
-  %gep1 = getelementptr float, float addrspace(1)* %gep0, i64 4095
-  %val = load volatile float, float addrspace(1)* %gep1
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i32 %voffset
+  %gep1 = getelementptr float, ptr addrspace(1) %gep0, i64 4095
+  %val = load volatile float, ptr addrspace(1) %gep1
   ret float %val
 }
-define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_load_sgpr_ptr_offset4095_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -891,13 +891,13 @@ define amdgpu_ps float @mubuf_load_sgpr_ptr_offset4095_vgpr_offset(float addrspa
 ; GFX7-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], s4 addr64 glc
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep0 = getelementptr float, float addrspace(1)* %ptr, i64 4095
-  %gep1 = getelementptr float, float addrspace(1)* %gep0, i32 %voffset
-  %val = load volatile float, float addrspace(1)* %gep1
+  %gep0 = getelementptr float, ptr addrspace(1) %ptr, i64 4095
+  %gep1 = getelementptr float, ptr addrspace(1) %gep0, i32 %voffset
+  %val = load volatile float, ptr addrspace(1) %gep1
   ret float %val
 }
 
-define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -926,13 +926,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4095(i32 addrspace(1)* in
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr) {
+define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr) {
 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 0
@@ -967,13 +967,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_offset4294967296(i32 addrspace(
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v2, 2
@@ -1002,13 +1002,13 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4095(i32 addrspace(1)* %p
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr) {
+define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr) {
 ; GFX6-LABEL: mubuf_atomicrmw_vgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 0
@@ -1037,13 +1037,13 @@ define amdgpu_ps float @mubuf_atomicrmw_vgpr_ptr_offset4294967296(i32 addrspace(
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset) {
+define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset) {
 ; GFX6-LABEL: mubuf_atomicrmw_sgpr_ptr_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
@@ -1076,13 +1076,13 @@ define amdgpu_ps float @mubuf_atomicrmw_sgpr_ptr_vgpr_offset(i32 addrspace(1)* i
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
-  %result = atomicrmw add i32 addrspace(1)* %gep, i32 2 seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
+  %result = atomicrmw add ptr addrspace(1) %gep, i32 2 seq_cst
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
+define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, s2
@@ -1113,14 +1113,14 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4095(i32 addrspace(1)* inre
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   %result = extractvalue { i32, i1 } %result.struct, 0
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)* inreg %ptr, i32 %old, i32 %in) {
+define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(ptr addrspace(1) inreg %ptr, i32 %old, i32 %in) {
 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s4, 0
@@ -1157,14 +1157,14 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_offset4294967296(i32 addrspace(1)
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v1
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   %result = extractvalue { i32, i1 } %result.struct, 0
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
+define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(ptr addrspace(1) %ptr, i32 %old, i32 %in) {
 ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4095:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v4, v2
@@ -1193,14 +1193,14 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4095(i32 addrspace(1)* %ptr
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4095
-  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4095
+  %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   %result = extractvalue { i32, i1 } %result.struct, 0
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)* %ptr, i32 %old, i32 %in) {
+define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(ptr addrspace(1) %ptr, i32 %old, i32 %in) {
 ; GFX6-LABEL: mubuf_cmpxchg_vgpr_ptr_offset4294967296:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    s_mov_b32 s0, 0
@@ -1229,14 +1229,14 @@ define amdgpu_ps float @mubuf_cmpxchg_vgpr_ptr_offset4294967296(i32 addrspace(1)
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v3
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4294967296
-  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4294967296
+  %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   %result = extractvalue { i32, i1 } %result.struct, 0
   %cast = bitcast i32 %result to float
   ret float %cast
 }
 
-define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inreg %ptr, i32 %voffset, i32 %old, i32 %in) {
+define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(ptr addrspace(1) inreg %ptr, i32 %voffset, i32 %old, i32 %in) {
 ; GFX6-LABEL: mubuf_cmpxchg_sgpr_ptr_vgpr_offset:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_mov_b32_e32 v3, v1
@@ -1269,8 +1269,8 @@ define amdgpu_ps float @mubuf_cmpxchg_sgpr_ptr_vgpr_offset(i32 addrspace(1)* inr
 ; GFX7-NEXT:    buffer_wbinvl1
 ; GFX7-NEXT:    v_mov_b32_e32 v0, v2
 ; GFX7-NEXT:    ; return to shader part epilog
-  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 %voffset
-  %result.struct = cmpxchg i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
+  %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 %voffset
+  %result.struct = cmpxchg ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst
   %result = extractvalue { i32, i1 } %result.struct, 0
   %cast = bitcast i32 %result to float
   ret float %cast
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index bf0758a645eba..fec564f9b45db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -10,7 +10,7 @@
 
 ; FIXME: FunctionLoweringInfo unhelpfully doesn't preserve an
 ; alignment less than the stack alignment.
-define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
+define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
 ; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dword s6, s[4:5], 0x8
@@ -56,24 +56,23 @@ entry:
 
 bb.0:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
-  %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
+  %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
   %cond1 = icmp eq i32 %arg.cond1, 0
   br i1 %cond1, label %bb.1, label %bb.2
 
 bb.1:
   ; Use the alloca outside of the defining block.
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in
-  %load = load i32, i32 addrspace(5)* %gep2
+  store i32 0, ptr addrspace(5) %alloca
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in
+  %load = load i32, ptr addrspace(5) %gep2
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %load, %tid
-  store i32 %add, i32 addrspace(1)* %out
+  store i32 %add, ptr addrspace(1) %out
   br label %bb.2
 
 bb.2:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 ; DEFAULTSIZE: .amdhsa_private_segment_fixed_size 4112
@@ -82,7 +81,7 @@ bb.2:
 ; ASSUME1024: .amdhsa_private_segment_fixed_size 1040
 ; ASSUME1024: ; ScratchSize: 1040
 
-define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) {
+define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
 ; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_load_dword s6, s[4:5], 0x8
@@ -123,19 +122,18 @@ entry:
 
 bb.0:
   %alloca = alloca [16 x i32], align 64, addrspace(5)
-  %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
-  %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in
-  %load = load i32, i32 addrspace(5)* %gep2
+  %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %alloca
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in
+  %load = load i32, ptr addrspace(5) %gep2
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %load, %tid
-  store i32 %add, i32 addrspace(1)* %out
+  store i32 %add, ptr addrspace(1) %out
   br label %bb.1
 
 bb.1:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 
@@ -146,7 +144,7 @@ bb.1:
 ; ASSUME1024: ; ScratchSize: 1088
 
 
-define void @func_non_entry_block_static_alloca_align4(i32 addrspace(1)* %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
+define void @func_non_entry_block_static_alloca_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
 ; GCN-LABEL: func_non_entry_block_static_alloca_align4:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -189,28 +187,27 @@ entry:
 
 bb.0:
   %alloca = alloca [16 x i32], align 4, addrspace(5)
-  %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
-  %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
+  %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
   %cond1 = icmp eq i32 %arg.cond1, 0
   br i1 %cond1, label %bb.1, label %bb.2
 
 bb.1:
   ; Use the alloca outside of the defining block.
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in
-  %load = load i32, i32 addrspace(5)* %gep2
+  store i32 0, ptr addrspace(5) %alloca
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in
+  %load = load i32, ptr addrspace(5) %gep2
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %load, %tid
-  store i32 %add, i32 addrspace(1)* %out
+  store i32 %add, ptr addrspace(1) %out
   br label %bb.2
 
 bb.2:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 
-define void @func_non_entry_block_static_alloca_align64(i32 addrspace(1)* %out, i32 %arg.cond, i32 %in) {
+define void @func_non_entry_block_static_alloca_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
 ; GCN-LABEL: func_non_entry_block_static_alloca_align64:
 ; GCN:       ; %bb.0: ; %entry
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -250,19 +247,18 @@ entry:
 
 bb.0:
   %alloca = alloca [16 x i32], align 64, addrspace(5)
-  %gep0 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 0
-  %gep1 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 1
-  store i32 0, i32 addrspace(5)* %gep0
-  store i32 1, i32 addrspace(5)* %gep1
-  %gep2 = getelementptr [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %in
-  %load = load i32, i32 addrspace(5)* %gep2
+  %gep1 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 1
+  store i32 0, ptr addrspace(5) %alloca
+  store i32 1, ptr addrspace(5) %gep1
+  %gep2 = getelementptr [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %in
+  %load = load i32, ptr addrspace(5) %gep2
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %add = add i32 %load, %tid
-  store i32 %add, i32 addrspace(1)* %out
+  store i32 %add, ptr addrspace(1) %out
   br label %bb.1
 
 bb.1:
-  store volatile i32 0, i32 addrspace(1)* undef
+  store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index 3e4b0d4fa1e1d..7b222fa054f12 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -21,7 +21,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
   ; FAST-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; FAST-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; FAST-NEXT:   [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__sgpr_srsrc
   ; GREEDY: bb.1 (%ir-block.0):
@@ -40,10 +40,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; GREEDY-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY8]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GREEDY-NEXT:   [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -67,7 +67,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
   ; FAST-NEXT:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
   ; FAST-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; FAST-NEXT:   [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__sgpr_srsrc
   ; GREEDY: bb.1 (%ir-block.0):
@@ -87,10 +87,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__sgpr_srsrc(<8 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[COPY9:%[0-9]+]]:vgpr(s32) = COPY [[COPY8]](s32)
   ; GREEDY-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, [[COPY9]](s32), [[BUILD_VECTOR]](<8 x s32>), 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
   ; GREEDY-NEXT:   [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -155,7 +155,7 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
   ; FAST-NEXT: {{  $}}
   ; FAST-NEXT: bb.5:
   ; FAST-NEXT:   [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: load_1d_vgpr_vaddr__vgpr_srsrc
   ; GREEDY: bb.1 (%ir-block.0):
@@ -216,10 +216,10 @@ define amdgpu_ps void @load_1d_vgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 %s) {
   ; GREEDY-NEXT: {{  $}}
   ; GREEDY-NEXT: bb.5:
   ; GREEDY-NEXT:   [[COPY9:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY9]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -285,7 +285,7 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
   ; FAST-NEXT: {{  $}}
   ; FAST-NEXT: bb.5:
   ; FAST-NEXT:   [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: load_1d_sgpr_vaddr__vgpr_srsrc
   ; GREEDY: bb.1 (%ir-block.0):
@@ -347,10 +347,10 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
   ; GREEDY-NEXT: {{  $}}
   ; GREEDY-NEXT: bb.5:
   ; GREEDY-NEXT:   [[COPY10:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1)
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[COPY10]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index 8f62f75b97c6f..0d7496cb10e08 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -25,7 +25,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
   ; FAST-NEXT:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; FAST-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; FAST-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp
   ; GREEDY: bb.1 (%ir-block.0):
@@ -48,10 +48,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
   ; GREEDY-NEXT:   [[COPY12:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
   ; GREEDY-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; GREEDY-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY12]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -79,7 +79,7 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
   ; FAST-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; FAST-NEXT:   [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
   ; FAST-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp
   ; GREEDY: bb.1 (%ir-block.0):
@@ -103,10 +103,10 @@ define amdgpu_ps void @sample_1d_sgpr_vaddr__sgpr_rsrc__sgpr_samp(<8 x i32> inre
   ; GREEDY-NEXT:   [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF
   ; GREEDY-NEXT:   [[COPY13:%[0-9]+]]:vgpr(s32) = COPY [[COPY12]](s32)
   ; GREEDY-NEXT:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:vgpr(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.sample.1d), 15, [[COPY13]](s32), [[BUILD_VECTOR]](<8 x s32>), [[BUILD_VECTOR1]](<4 x s32>), 0, 0, 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -175,7 +175,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
   ; FAST-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; FAST-NEXT: {{  $}}
   ; FAST-NEXT: bb.5:
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp
   ; GREEDY: bb.1 (%ir-block.0):
@@ -240,10 +240,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__sgpr_samp(<8 x i32> %rsr
   ; GREEDY-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; GREEDY-NEXT: {{  $}}
   ; GREEDY-NEXT: bb.5:
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -304,7 +304,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
   ; FAST-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; FAST-NEXT: {{  $}}
   ; FAST-NEXT: bb.5:
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp
   ; GREEDY: bb.1 (%ir-block.0):
@@ -361,10 +361,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__sgpr_rsrc__vgpr_samp(<8 x i32> inre
   ; GREEDY-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; GREEDY-NEXT: {{  $}}
   ; GREEDY-NEXT: bb.5:
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
@@ -445,7 +445,7 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
   ; FAST-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; FAST-NEXT: {{  $}}
   ; FAST-NEXT: bb.5:
-  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; FAST-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; FAST-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp
   ; GREEDY: bb.1 (%ir-block.0):
@@ -522,10 +522,10 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
   ; GREEDY-NEXT:   $exec = S_MOV_B64_term [[S_MOV_B64_]]
   ; GREEDY-NEXT: {{  $}}
   ; GREEDY-NEXT: bb.5:
-  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `<4 x float> addrspace(1)* undef`, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>), [[DEF]](p1) :: (store (<4 x s32>) into `ptr addrspace(1) undef`, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* undef
+  store <4 x float> %v, ptr addrspace(1) undef
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
index b941a250004eb..f0b75187ad874 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
@@ -641,7 +641,7 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128)
-  ; CHECK-NEXT:   G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_i96_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -658,10 +658,10 @@ define amdgpu_ps void @s_buffer_load_i96_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 0, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; GREEDY-NEXT:   [[TRUNC:%[0-9]+]]:vgpr(s96) = G_TRUNC [[AMDGPU_BUFFER_LOAD]](s128)
-  ; GREEDY-NEXT:   G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `i96 addrspace(1)* undef`, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[TRUNC]](s96), [[DEF]](p1) :: (store (s96) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store i96 %val, i96 addrspace(1)* undef
+  store i96 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -684,10 +684,10 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256)
-  ; CHECK-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_i256_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -706,13 +706,13 @@ define amdgpu_ps void @s_buffer_load_i256_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; GREEDY-NEXT:   [[MV:%[0-9]+]]:vgpr(s256) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s256)
-  ; GREEDY-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i256 addrspace(1)* undef`, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i256 addrspace(1)* undef` + 16, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store i256 %val, i256 addrspace(1)* undef
+  store i256 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -737,16 +737,16 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; CHECK-NEXT:   [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512)
-  ; CHECK-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_i512_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -767,19 +767,19 @@ define amdgpu_ps void @s_buffer_load_i512_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(s128) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; GREEDY-NEXT:   [[MV:%[0-9]+]]:vgpr(s512) = G_MERGE_VALUES [[AMDGPU_BUFFER_LOAD]](s128), [[AMDGPU_BUFFER_LOAD1]](s128), [[AMDGPU_BUFFER_LOAD2]](s128), [[AMDGPU_BUFFER_LOAD3]](s128)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(s128), [[UV1:%[0-9]+]]:vgpr(s128), [[UV2:%[0-9]+]]:vgpr(s128), [[UV3:%[0-9]+]]:vgpr(s128) = G_UNMERGE_VALUES [[MV]](s512)
-  ; GREEDY-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `i512 addrspace(1)* undef`, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](s128), [[DEF]](p1) :: (store (s128) into `ptr addrspace(1) undef`, align 8, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 16, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](s128), [[PTR_ADD]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 16, align 8, addrspace 1)
   ; GREEDY-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; GREEDY-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 32, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV2]](s128), [[PTR_ADD1]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 32, align 8, addrspace 1)
   ; GREEDY-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; GREEDY-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `i512 addrspace(1)* undef` + 48, align 8, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV3]](s128), [[PTR_ADD2]](p1) :: (store (s128) into `ptr addrspace(1) undef` + 48, align 8, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store i512 %val, i512 addrspace(1)* undef
+  store i512 %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -802,10 +802,10 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v16i16_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -824,13 +824,13 @@ define amdgpu_ps void @s_buffer_load_v16i16_vgpr_offset(<4 x i32> inreg %rsrc, i
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<16 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s16>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef`, align 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<16 x i16> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <16 x i16> %val, <16 x i16> addrspace(1)* undef
+  store <16 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -855,16 +855,16 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v32i16_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -885,19 +885,19 @@ define amdgpu_ps void @s_buffer_load_v32i16_vgpr_offset(<4 x i32> inreg %rsrc, i
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<8 x s16>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<32 x s16>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<8 x s16>), [[AMDGPU_BUFFER_LOAD1]](<8 x s16>), [[AMDGPU_BUFFER_LOAD2]](<8 x s16>), [[AMDGPU_BUFFER_LOAD3]](<8 x s16>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<8 x s16>), [[UV1:%[0-9]+]]:vgpr(<8 x s16>), [[UV2:%[0-9]+]]:vgpr(<8 x s16>), [[UV3:%[0-9]+]]:vgpr(<8 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<32 x s16>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef`, align 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<8 x s16>), [[DEF]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<8 x s16>), [[PTR_ADD]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; GREEDY-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV2]](<8 x s16>), [[PTR_ADD1]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; GREEDY-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `<32 x i16> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV3]](<8 x s16>), [[PTR_ADD2]](p1) :: (store (<8 x s16>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <32 x i16> %val, <32 x i16> addrspace(1)* undef
+  store <32 x i16> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -920,10 +920,10 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v4i64_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -942,13 +942,13 @@ define amdgpu_ps void @s_buffer_load_v4i64_vgpr_offset(<4 x i32> inreg %rsrc, i3
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x s64>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef`, align 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<4 x i64> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <4 x i64> %val, <4 x i64> addrspace(1)* undef
+  store <4 x i64> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -973,16 +973,16 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v8i64_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -1003,19 +1003,19 @@ define amdgpu_ps void @s_buffer_load_v8i64_vgpr_offset(<4 x i32> inreg %rsrc, i3
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x s64>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x s64>), [[AMDGPU_BUFFER_LOAD1]](<2 x s64>), [[AMDGPU_BUFFER_LOAD2]](<2 x s64>), [[AMDGPU_BUFFER_LOAD3]](<2 x s64>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x s64>), [[UV1:%[0-9]+]]:vgpr(<2 x s64>), [[UV2:%[0-9]+]]:vgpr(<2 x s64>), [[UV3:%[0-9]+]]:vgpr(<2 x s64>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x s64>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef`, align 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x s64>), [[DEF]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x s64>), [[PTR_ADD]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; GREEDY-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV2]](<2 x s64>), [[PTR_ADD1]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; GREEDY-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `<8 x i64> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV3]](<2 x s64>), [[PTR_ADD2]](p1) :: (store (<2 x s64>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
   %val = call <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <8 x i64> %val, <8 x i64> addrspace(1)* undef
+  store <8 x i64> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1038,10 +1038,10 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v4p1_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -1060,13 +1060,13 @@ define amdgpu_ps void @s_buffer_load_v4p1_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD1:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 16, 0, 0 :: (dereferenceable invariant load (s128), align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<4 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<4 x p1>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef`, align 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 32, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<4 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 32, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 32, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
-  %val = call <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <4 x i8 addrspace(1)*> %val, <4 x i8 addrspace(1)*> addrspace(1)* undef
+  %val = call <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32> %rsrc, i32 %soffset, i32 0)
+  store <4 x ptr addrspace(1)> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -1091,16 +1091,16 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; CHECK-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; CHECK-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>)
   ; CHECK-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>)
-  ; CHECK-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; CHECK-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; CHECK-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; CHECK-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; CHECK-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; CHECK-NEXT:   G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; CHECK-NEXT:   G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; CHECK-NEXT:   S_ENDPGM 0
   ; GREEDY-LABEL: name: s_buffer_load_v8p1_vgpr_offset
   ; GREEDY: bb.1 (%ir-block.0):
@@ -1121,19 +1121,19 @@ define amdgpu_ps void @s_buffer_load_v8p1_vgpr_offset(<4 x i32> inreg %rsrc, i32
   ; GREEDY-NEXT:   [[AMDGPU_BUFFER_LOAD3:%[0-9]+]]:vgpr(<2 x p1>) = G_AMDGPU_BUFFER_LOAD [[BUILD_VECTOR]](<4 x s32>), [[C1]](s32), [[COPY4]], [[C]], 48, 0, 0 :: (dereferenceable invariant load (s128) from unknown-address + 48, align 4)
   ; GREEDY-NEXT:   [[CONCAT_VECTORS:%[0-9]+]]:vgpr(<8 x p1>) = G_CONCAT_VECTORS [[AMDGPU_BUFFER_LOAD]](<2 x p1>), [[AMDGPU_BUFFER_LOAD1]](<2 x p1>), [[AMDGPU_BUFFER_LOAD2]](<2 x p1>), [[AMDGPU_BUFFER_LOAD3]](<2 x p1>)
   ; GREEDY-NEXT:   [[UV:%[0-9]+]]:vgpr(<2 x p1>), [[UV1:%[0-9]+]]:vgpr(<2 x p1>), [[UV2:%[0-9]+]]:vgpr(<2 x p1>), [[UV3:%[0-9]+]]:vgpr(<2 x p1>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<8 x p1>)
-  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef`, align 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, align 64, addrspace 1)
   ; GREEDY-NEXT:   [[C2:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 16
   ; GREEDY-NEXT:   [[PTR_ADD:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C2]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 16, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV1]](<2 x p1>), [[PTR_ADD]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 16, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C3:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 32
   ; GREEDY-NEXT:   [[PTR_ADD1:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C3]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 32, align 32, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV2]](<2 x p1>), [[PTR_ADD1]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 32, align 32, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   [[C4:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 48
   ; GREEDY-NEXT:   [[PTR_ADD2:%[0-9]+]]:sgpr(p1) = G_PTR_ADD [[DEF]], [[C4]](s64)
-  ; GREEDY-NEXT:   G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `<8 x i8 addrspace(1)*> addrspace(1)* undef` + 48, basealign 64, addrspace 1)
+  ; GREEDY-NEXT:   G_STORE [[UV3]](<2 x p1>), [[PTR_ADD2]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef` + 48, basealign 64, addrspace 1)
   ; GREEDY-NEXT:   S_ENDPGM 0
-  %val = call <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32> %rsrc, i32 %soffset, i32 0)
-  store <8 x i8 addrspace(1)*> %val, <8 x i8 addrspace(1)*> addrspace(1)* undef
+  %val = call <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32> %rsrc, i32 %soffset, i32 0)
+  store <8 x ptr addrspace(1)> %val, ptr addrspace(1) undef
   ret void
 }
 
@@ -3161,5 +3161,5 @@ declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
 declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
 declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
 
-declare <4 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v4p1i8(<4 x i32>, i32, i32 immarg)
-declare <8 x i8 addrspace(1)*> @llvm.amdgcn.s.buffer.load.v8p1i8(<4 x i32>, i32, i32 immarg)
+declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
+declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index b3a3d6394ce03..a922e0b46dfed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
-define amdgpu_kernel void @sdivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
+define amdgpu_kernel void @sdivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) {
 ; GFX8-LABEL: sdivrem_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -136,13 +136,13 @@ define amdgpu_kernel void @sdivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)
 ; GFX10-NEXT:    global_store_dword v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i32 %x, %y
-  store i32 %div, i32 addrspace(1)* %out0
+  store i32 %div, ptr addrspace(1) %out0
   %rem = srem i32 %x, %y
-  store i32 %rem, i32 addrspace(1)* %out1
+  store i32 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)* %out1, i64 %x, i64 %y) {
+define amdgpu_kernel void @sdivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) {
 ; GFX8-LABEL: sdivrem_i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
@@ -613,13 +613,13 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    global_store_dwordx2 v4, v[2:3], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i64 %x, %y
-  store i64 %div, i64 addrspace(1)* %out0
+  store i64 %div, ptr addrspace(1) %out0
   %rem = srem i64 %x, %y
-  store i64 %rem, i64 addrspace(1)* %out1
+  store i64 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @sdivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) {
 ; GFX8-LABEL: sdivrem_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
@@ -842,13 +842,13 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX10-NEXT:    global_store_dwordx2 v4, v[2:3], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i32> %x, %y
-  store <2 x i32> %div, <2 x i32> addrspace(1)* %out0
+  store <2 x i32> %div, ptr addrspace(1) %out0
   %rem = srem <2 x i32> %x, %y
-  store <2 x i32> %rem, <2 x i32> addrspace(1)* %out1
+  store <2 x i32> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @sdivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) {
 ; GFX8-LABEL: sdivrem_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
@@ -1268,13 +1268,13 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX10-NEXT:    global_store_dwordx4 v8, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <4 x i32> %x, %y
-  store <4 x i32> %div, <4 x i32> addrspace(1)* %out0
+  store <4 x i32> %div, ptr addrspace(1) %out0
   %rem = srem <4 x i32> %x, %y
-  store <4 x i32> %rem, <4 x i32> addrspace(1)* %out1
+  store <4 x i32> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) {
+define amdgpu_kernel void @sdivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) {
 ; GFX8-LABEL: sdivrem_v2i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
@@ -2191,13 +2191,13 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    global_store_dwordx4 v16, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i64> %x, %y
-  store <2 x i64> %div, <2 x i64> addrspace(1)* %out0
+  store <2 x i64> %div, ptr addrspace(1) %out0
   %rem = srem <2 x i64> %x, %y
-  store <2 x i64> %rem, <2 x i64> addrspace(1)* %out1
+  store <2 x i64> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) {
+define amdgpu_kernel void @sdiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) {
 ; GFX8-LABEL: sdiv_i8:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -2336,13 +2336,13 @@ define amdgpu_kernel void @sdiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out
 ; GFX10-NEXT:    global_store_byte v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i8 %x, %y
-  store i8 %div, i8 addrspace(1)* %out0
+  store i8 %div, ptr addrspace(1) %out0
   %rem = srem i8 %x, %y
-  store i8 %rem, i8 addrspace(1)* %out1
+  store i8 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) {
+define amdgpu_kernel void @sdivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) {
 ; GFX8-LABEL: sdivrem_v2i8:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x10
@@ -2600,13 +2600,13 @@ define amdgpu_kernel void @sdivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a
 ; GFX10-NEXT:    global_store_short v1, v2, s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i8> %x, %y
-  store <2 x i8> %div, <2 x i8> addrspace(1)* %out0
+  store <2 x i8> %div, ptr addrspace(1) %out0
   %rem = srem <2 x i8> %x, %y
-  store <2 x i8> %rem, <2 x i8> addrspace(1)* %out1
+  store <2 x i8> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %out1, i16 %x, i16 %y) {
+define amdgpu_kernel void @sdiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) {
 ; GFX8-LABEL: sdiv_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -2745,13 +2745,13 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %
 ; GFX10-NEXT:    global_store_short v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i16 %x, %y
-  store i16 %div, i16 addrspace(1)* %out0
+  store i16 %div, ptr addrspace(1) %out0
   %rem = srem i16 %x, %y
-  store i16 %rem, i16 addrspace(1)* %out1
+  store i16 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) {
+define amdgpu_kernel void @sdivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) {
 ; GFX8-LABEL: sdivrem_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
@@ -3003,13 +3003,13 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ; GFX10-NEXT:    global_store_dword v1, v2, s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i16> %x, %y
-  store <2 x i16> %div, <2 x i16> addrspace(1)* %out0
+  store <2 x i16> %div, ptr addrspace(1) %out0
   %rem = srem <2 x i16> %x, %y
-  store <2 x i16> %rem, <2 x i16> addrspace(1)* %out1
+  store <2 x i16> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %out1, i3 %x, i3 %y) {
+define amdgpu_kernel void @sdivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) {
 ; GFX8-LABEL: sdivrem_i3:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -3154,13 +3154,13 @@ define amdgpu_kernel void @sdivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %
 ; GFX10-NEXT:    global_store_byte v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i3 %x, %y
-  store i3 %div, i3 addrspace(1)* %out0
+  store i3 %div, ptr addrspace(1) %out0
   %rem = srem i3 %x, %y
-  store i3 %rem, i3 addrspace(1)* %out1
+  store i3 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) {
+define amdgpu_kernel void @sdivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) {
 ; GFX8-LABEL: sdivrem_i27:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -3305,8 +3305,8 @@ define amdgpu_kernel void @sdivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)
 ; GFX10-NEXT:    global_store_dword v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv i27 %x, %y
-  store i27 %div, i27 addrspace(1)* %out0
+  store i27 %div, ptr addrspace(1) %out0
   %rem = srem i27 %x, %y
-  store i27 %rem, i27 addrspace(1)* %out1
+  store i27 %rem, ptr addrspace(1) %out1
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index 8f300f2baccc4..cd74035afee75 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -242,7 +242,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) {
   ret i64 %shl
 }
 
-define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
+define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
 ; GFX7-LABEL: mulu24_shl64:
 ; GFX7:       ; %bb.0: ; %bb
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
@@ -319,12 +319,12 @@ bb:
   %tmp1 = and i32 %tmp, 6
   %mulconv = mul nuw nsw i32 %tmp1, 7
   %tmp2 = zext i32 %mulconv to i64
-  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp2
-  store i32 0, i32 addrspace(1)* %tmp3, align 4
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg, i64 %tmp2
+  store i32 0, ptr addrspace(1) %tmp3, align 4
   ret void
 }
 
-define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) {
+define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) {
 ; GFX7-LABEL: muli24_shl64:
 ; GFX7:       ; %bb.0: ; %bb
 ; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
@@ -417,14 +417,14 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp2 = sext i32 %tmp to i64
-  %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp2
-  %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+  %tmp3 = getelementptr inbounds i32, ptr addrspace(1) %arg1, i64 %tmp2
+  %tmp4 = load i32, ptr addrspace(1) %tmp3, align 4
   %tmp5 = or i32 %tmp4, -8388608
   %tmp6 = mul nsw i32 %tmp5, -7
   %tmp7 = zext i32 %tmp6 to i64
   %tmp8 = shl nuw nsw i64 %tmp7, 3
-  %tmp9 = getelementptr inbounds i64, i64 addrspace(1)* %arg, i64 %tmp2
-  store i64 %tmp8, i64 addrspace(1)* %tmp9, align 8
+  %tmp9 = getelementptr inbounds i64, ptr addrspace(1) %arg, i64 %tmp2
+  store i64 %tmp8, ptr addrspace(1) %tmp9, align 8
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
index 836ce947fbfa2..9bc6189503887 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
@@ -9,11 +9,11 @@
 ; GCN-LABEL: {{^}}smrd0:
 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x1 ; encoding: [0x01
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x4
-define amdgpu_kernel void @smrd0(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd0(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 1
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 1
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -21,11 +21,11 @@ entry:
 ; GCN-LABEL: {{^}}smrd1:
 ; SICI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xff ; encoding: [0xff,0x{{[0-9]+[137]}}
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x3fc
-define amdgpu_kernel void @smrd1(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd1(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 255
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 255
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -36,11 +36,11 @@ entry:
 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x100
 ; VIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x400
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd2(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd2(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 256
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 256
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -51,11 +51,11 @@ entry:
 ; XSI: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[{{[0-9]:[0-9]}}], 0xb ; encoding: [0x0b
 ; TODO: Add VI checks
 ; XGCN: s_endpgm
-define amdgpu_kernel void @smrd3(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd3(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 4294967296 ; 2 ^ 32
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 4294967296 ; 2 ^ 32
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -67,11 +67,11 @@ entry:
 ; VI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0xffffc
 ; GFX9_10: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc
 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
-define amdgpu_kernel void @smrd4(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd4(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262143
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 262143
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -81,11 +81,11 @@ entry:
 ; SIVIGFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], [[OFFSET]]
 ; CI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x40000
 ; GCN: s_endpgm
-define amdgpu_kernel void @smrd5(i32 addrspace(4)* %ptr) {
+define amdgpu_kernel void @smrd5(ptr addrspace(4) %ptr) {
 entry:
-  %0 = getelementptr i32, i32 addrspace(4)* %ptr, i64 262144
-  %1 = load i32, i32 addrspace(4)* %0
-  store i32 %1, i32 addrspace(1)* undef
+  %0 = getelementptr i32, ptr addrspace(4) %ptr, i64 262144
+  %1 = load i32, ptr addrspace(4) %0
+  store i32 %1, ptr addrspace(1) undef
   ret void
 }
 
@@ -94,11 +94,11 @@ entry:
 ; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], -0x4
-define amdgpu_kernel void @smrd6(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
 entry:
-  %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -1
-  %tmp1 = load i32, i32 addrspace(4)* %tmp
-  store i32 %tmp1, i32 addrspace(1)* %out
+  %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -1
+  %tmp1 = load i32, ptr addrspace(4) %tmp
+  store i32 %tmp1, ptr addrspace(1) %out
   ret void
 }
 
@@ -107,10 +107,10 @@ entry:
 ; GCN: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, 0xffe00000
 ; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
 ; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
-define amdgpu_kernel void @smrd7(i32 addrspace(1)* %out, i32 addrspace(4)* %ptr) #0 {
+define amdgpu_kernel void @smrd7(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
 entry:
-  %tmp = getelementptr i32, i32 addrspace(4)* %ptr, i64 -524288
-  %tmp1 = load i32, i32 addrspace(4)* %tmp
-  store i32 %tmp1, i32 addrspace(1)* %out
+  %tmp = getelementptr i32, ptr addrspace(4) %ptr, i64 -524288
+  %tmp1 = load i32, ptr addrspace(4) %tmp
+  store i32 %tmp1, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
index 63b4bf25e5bf1..b9c7cc3aa3f01 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
@@ -7,7 +7,7 @@
 ; FIXME:
 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
 
-define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -60,11 +60,11 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX11-NEXT:    ds_store_b128 v4, v[0:3]
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out
+  store <4 x i32> %x, ptr addrspace(3) %out
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32_align1(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -286,11 +286,11 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_b8 v1, v7 offset:14
 ; GFX11-NEXT:    ds_store_b8 v1, v8 offset:15
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 1
+  store <4 x i32> %x, ptr addrspace(3) %out, align 1
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32_align2(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -401,11 +401,11 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_b16 v1, v4 offset:12
 ; GFX11-NEXT:    ds_store_b16 v1, v8 offset:14
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 2
+  store <4 x i32> %x, ptr addrspace(3) %out, align 2
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32_align4(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -462,11 +462,11 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_2addr_b32 v1, v0, v2 offset1:1
 ; GFX11-NEXT:    ds_store_2addr_b32 v1, v3, v4 offset0:2 offset1:3
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 4
+  store <4 x i32> %x, ptr addrspace(3) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32_align8(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -520,11 +520,11 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX11-NEXT:    ds_store_2addr_b64 v4, v[0:1], v[2:3] offset1:1
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 8
+  store <4 x i32> %x, ptr addrspace(3) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
+define amdgpu_kernel void @store_lds_v4i32_align16(ptr addrspace(3) %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -577,6 +577,6 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    v_mov_b32_e32 v4, s0
 ; GFX11-NEXT:    ds_store_b128 v4, v[0:3]
 ; GFX11-NEXT:    s_endpgm
-  store <4 x i32> %x, <4 x i32> addrspace(3)* %out, align 16
+  store <4 x i32> %x, ptr addrspace(3) %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
index f2fb1371a8fb1..cfca355e61517 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
@@ -7,7 +7,7 @@
 ; FIXME:
 ; XUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -verify-machineinstrs < %s | FileCheck --check-prefix=GFX6 %s
 
-define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
@@ -56,11 +56,11 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
 ; GFX11-NEXT:    ds_store_b96 v3, v[0:2]
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out
+  store <3 x i32> %x, ptr addrspace(3) %out
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32_align1(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -233,11 +233,11 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_b8 v1, v6 offset:10
 ; GFX11-NEXT:    ds_store_b8 v1, v12 offset:11
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 1
+  store <3 x i32> %x, ptr addrspace(3) %out, align 1
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32_align2(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align2:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -329,11 +329,11 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_b16 v1, v4 offset:8
 ; GFX11-NEXT:    ds_store_b16 v1, v6 offset:10
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 2
+  store <3 x i32> %x, ptr addrspace(3) %out, align 2
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32_align4(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -386,11 +386,11 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_2addr_b32 v1, v0, v2 offset1:1
 ; GFX11-NEXT:    ds_store_b32 v1, v3 offset:8
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 4
+  store <3 x i32> %x, ptr addrspace(3) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32_align8(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
@@ -443,11 +443,11 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    ds_store_2addr_b32 v1, v0, v2 offset1:1
 ; GFX11-NEXT:    ds_store_b32 v1, v3 offset:8
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 8
+  store <3 x i32> %x, ptr addrspace(3) %out, align 8
   ret void
 }
 
-define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
+define amdgpu_kernel void @store_lds_v3i32_align16(ptr addrspace(3) %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
@@ -496,6 +496,6 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ; GFX11-NEXT:    v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s0
 ; GFX11-NEXT:    ds_store_b96 v3, v[0:2]
 ; GFX11-NEXT:    s_endpgm
-  store <3 x i32> %x, <3 x i32> addrspace(3)* %out, align 16
+  store <3 x i32> %x, ptr addrspace(3) %out, align 16
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index 6af44319a502f..9d8f051b83036 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -amdgpu-bypass-slow-div=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
 
-define amdgpu_kernel void @udivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
+define amdgpu_kernel void @udivrem_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i32 %x, i32 %y) {
 ; GFX8-LABEL: udivrem_i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -103,13 +103,13 @@ define amdgpu_kernel void @udivrem_i32(i32 addrspace(1)* %out0, i32 addrspace(1)
 ; GFX10-NEXT:    global_store_dword v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i32 %x, %y
-  store i32 %div, i32 addrspace(1)* %out0
+  store i32 %div, ptr addrspace(1) %out0
   %rem = urem i32 %x, %y
-  store i32 %rem, i32 addrspace(1)* %out1
+  store i32 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)* %out1, i64 %x, i64 %y) {
+define amdgpu_kernel void @udivrem_i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i64 %x, i64 %y) {
 ; GFX8-LABEL: udivrem_i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
@@ -519,13 +519,13 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 ; GFX10-NEXT:    global_store_dwordx2 v7, v[2:3], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i64 %x, %y
-  store i64 %div, i64 addrspace(1)* %out0
+  store i64 %div, ptr addrspace(1) %out0
   %rem = urem i64 %x, %y
-  store i64 %rem, i64 addrspace(1)* %out1
+  store i64 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) {
+define amdgpu_kernel void @udivrem_v2i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i32> %x, <2 x i32> %y) {
 ; GFX8-LABEL: udivrem_v2i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[4:5], 0x0
@@ -682,13 +682,13 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX10-NEXT:    global_store_dwordx2 v8, v[2:3], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i32> %x, %y
-  store <2 x i32> %div, <2 x i32> addrspace(1)* %out0
+  store <2 x i32> %div, ptr addrspace(1) %out0
   %rem = urem <2 x i32> %x, %y
-  store <2 x i32> %rem, <2 x i32> addrspace(1)* %out1
+  store <2 x i32> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) {
+define amdgpu_kernel void @udivrem_v4i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <4 x i32> %x, <4 x i32> %y) {
 ; GFX8-LABEL: udivrem_v4i32:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
@@ -976,13 +976,13 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX10-NEXT:    global_store_dwordx4 v8, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <4 x i32> %x, %y
-  store <4 x i32> %div, <4 x i32> addrspace(1)* %out0
+  store <4 x i32> %div, ptr addrspace(1) %out0
   %rem = urem <4 x i32> %x, %y
-  store <4 x i32> %rem, <4 x i32> addrspace(1)* %out1
+  store <4 x i32> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) {
+define amdgpu_kernel void @udivrem_v2i64(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i64> %x, <2 x i64> %y) {
 ; GFX8-LABEL: udivrem_v2i64:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
@@ -1776,13 +1776,13 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    global_store_dwordx4 v9, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i64> %x, %y
-  store <2 x i64> %div, <2 x i64> addrspace(1)* %out0
+  store <2 x i64> %div, ptr addrspace(1) %out0
   %rem = urem <2 x i64> %x, %y
-  store <2 x i64> %rem, <2 x i64> addrspace(1)* %out1
+  store <2 x i64> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out1, i8 %x, i8 %y) {
+define amdgpu_kernel void @udiv_i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i8 %x, i8 %y) {
 ; GFX8-LABEL: udiv_i8:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -1888,13 +1888,13 @@ define amdgpu_kernel void @udiv_i8(i8 addrspace(1)* %out0, i8 addrspace(1)* %out
 ; GFX10-NEXT:    global_store_byte v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i8 %x, %y
-  store i8 %div, i8 addrspace(1)* %out0
+  store i8 %div, ptr addrspace(1) %out0
   %rem = urem i8 %x, %y
-  store i8 %rem, i8 addrspace(1)* %out1
+  store i8 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> addrspace(1)* %out1, <2 x i8> %x, <2 x i8> %y) {
+define amdgpu_kernel void @udivrem_v2i8(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i8> %x, <2 x i8> %y) {
 ; GFX8-LABEL: udivrem_v2i8:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x10
@@ -2085,13 +2085,13 @@ define amdgpu_kernel void @udivrem_v2i8(<2 x i8> addrspace(1)* %out0, <2 x i8> a
 ; GFX10-NEXT:    global_store_short v1, v2, s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i8> %x, %y
-  store <2 x i8> %div, <2 x i8> addrspace(1)* %out0
+  store <2 x i8> %div, ptr addrspace(1) %out0
   %rem = urem <2 x i8> %x, %y
-  store <2 x i8> %rem, <2 x i8> addrspace(1)* %out1
+  store <2 x i8> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %out1, i16 %x, i16 %y) {
+define amdgpu_kernel void @udiv_i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i16 %x, i16 %y) {
 ; GFX8-LABEL: udiv_i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -2197,13 +2197,13 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %
 ; GFX10-NEXT:    global_store_short v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i16 %x, %y
-  store i16 %div, i16 addrspace(1)* %out0
+  store i16 %div, ptr addrspace(1) %out0
   %rem = urem i16 %x, %y
-  store i16 %rem, i16 addrspace(1)* %out1
+  store i16 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) {
+define amdgpu_kernel void @udivrem_v2i16(ptr addrspace(1) %out0, ptr addrspace(1) %out1, <2 x i16> %x, <2 x i16> %y) {
 ; GFX8-LABEL: udivrem_v2i16:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
@@ -2388,13 +2388,13 @@ define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ; GFX10-NEXT:    global_store_dword v1, v2, s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i16> %x, %y
-  store <2 x i16> %div, <2 x i16> addrspace(1)* %out0
+  store <2 x i16> %div, ptr addrspace(1) %out0
   %rem = urem <2 x i16> %x, %y
-  store <2 x i16> %rem, <2 x i16> addrspace(1)* %out1
+  store <2 x i16> %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %out1, i3 %x, i3 %y) {
+define amdgpu_kernel void @udivrem_i3(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i3 %x, i3 %y) {
 ; GFX8-LABEL: udivrem_i3:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dword s6, s[4:5], 0x10
@@ -2506,13 +2506,13 @@ define amdgpu_kernel void @udivrem_i3(i3 addrspace(1)* %out0, i3 addrspace(1)* %
 ; GFX10-NEXT:    global_store_byte v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i3 %x, %y
-  store i3 %div, i3 addrspace(1)* %out0
+  store i3 %div, ptr addrspace(1) %out0
   %rem = urem i3 %x, %y
-  store i3 %rem, i3 addrspace(1)* %out1
+  store i3 %rem, ptr addrspace(1) %out1
   ret void
 }
 
-define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)* %out1, i27 %x, i27 %y) {
+define amdgpu_kernel void @udivrem_i27(ptr addrspace(1) %out0, ptr addrspace(1) %out1, i27 %x, i27 %y) {
 ; GFX8-LABEL: udivrem_i27:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
@@ -2624,8 +2624,8 @@ define amdgpu_kernel void @udivrem_i27(i27 addrspace(1)* %out0, i27 addrspace(1)
 ; GFX10-NEXT:    global_store_dword v2, v1, s[2:3]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv i27 %x, %y
-  store i27 %div, i27 addrspace(1)* %out0
+  store i27 %div, ptr addrspace(1) %out0
   %rem = urem i27 %x, %y
-  store i27 %rem, i27 addrspace(1)* %out1
+  store i27 %rem, ptr addrspace(1) %out1
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
index f81d7128952dc..82fa56f15db35 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX10 %s
 
-define amdgpu_kernel void @constant_load_i8_align4(i8 addrspace (1)* %out, i8 addrspace(4)* %in) #0 {
+define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
 ; GFX8-LABEL: constant_load_i8_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -37,12 +37,12 @@ define amdgpu_kernel void @constant_load_i8_align4(i8 addrspace (1)* %out, i8 ad
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_byte v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %ld = load i8, i8 addrspace(4)* %in, align 4
-  store i8 %ld, i8 addrspace(1)* %out, align 4
+  %ld = load i8, ptr addrspace(4) %in, align 4
+  store i8 %ld, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align4(i16 addrspace (1)* %out, i16 addrspace(4)* %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
 ; GFX8-LABEL: constant_load_i16_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -76,12 +76,12 @@ define amdgpu_kernel void @constant_load_i16_align4(i16 addrspace (1)* %out, i16
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_short v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %ld = load i16, i16 addrspace(4)* %in, align 4
-  store i16 %ld, i16 addrspace(1)* %out, align 4
+  %ld = load i16, ptr addrspace(4) %in, align 4
+  store i16 %ld, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: sextload_i8_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -118,13 +118,13 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in, align 4
+  %load = load i8, ptr addrspace(1) %in, align 4
   %sext = sext i8 %load to i32
-  store i32 %sext, i32 addrspace(1)* %out, align 4
+  store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @sextload_i16_to_i32_align4(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: sextload_i16_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -161,13 +161,13 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(i32 addrspace(1)* %out, i1
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i16, i16 addrspace(1)* %in, align 4
+  %load = load i16, ptr addrspace(1) %in, align 4
   %sext = sext i16 %load to i32
-  store i32 %sext, i32 addrspace(1)* %out, align 4
+  store i32 %sext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @zextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: zextload_i8_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -204,13 +204,13 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(i32 addrspace(1)* %out, i8
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in, align 4
+  %load = load i8, ptr addrspace(1) %in, align 4
   %zext = zext i8 %load to i32
-  store i32 %zext, i32 addrspace(1)* %out, align 4
+  store i32 %zext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @zextload_i16_to_i32_align4(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: zextload_i16_to_i32_align4:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -247,13 +247,13 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(i32 addrspace(1)* %out, i1
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i16, i16 addrspace(1)* %in, align 4
+  %load = load i16, ptr addrspace(1) %in, align 4
   %zext = zext i16 %load to i32
-  store i32 %zext, i32 addrspace(1)* %out, align 4
+  store i32 %zext, ptr addrspace(1) %out, align 4
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: constant_load_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -286,12 +286,12 @@ define amdgpu_kernel void @constant_load_i8_align2(i8 addrspace(1)* %out, i8 add
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in, align 2
-  store i8 %load, i8 addrspace(1)* %out, align 2
+  %load = load i8, ptr addrspace(1) %in, align 2
+  store i8 %load, ptr addrspace(1) %out, align 2
   ret void
 }
 
-define amdgpu_kernel void @constant_load_i16_align2(i16 addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: constant_load_i16_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -324,12 +324,12 @@ define amdgpu_kernel void @constant_load_i16_align2(i16 addrspace(1)* %out, i16
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-  %load = load i16, i16 addrspace(1)* %in, align 2
-  store i16 %load, i16 addrspace(1)* %out, align 2
+  %load = load i16, ptr addrspace(1) %in, align 2
+  store i16 %load, ptr addrspace(1) %out, align 2
   ret void
 }
 
-define amdgpu_kernel void @constant_sextload_i8_align2(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: constant_sextload_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -370,13 +370,13 @@ define amdgpu_kernel void @constant_sextload_i8_align2(i32 addrspace(1)* %out, i
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
 ; GFX10-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in, align 2
+  %load = load i8, ptr addrspace(1) %in, align 2
   %sextload = sext i8 %load to i32
-  store i32 %sextload, i32 addrspace(1)* %out, align 2
+  store i32 %sextload, ptr addrspace(1) %out, align 2
   ret void
 }
 
-define amdgpu_kernel void @constant_zextload_i8_align2(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
 ; GFX8-LABEL: constant_zextload_i8_align2:
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
@@ -417,9 +417,9 @@ define amdgpu_kernel void @constant_zextload_i8_align2(i32 addrspace(1)* %out, i
 ; GFX10-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-NEXT:    global_store_short_d16_hi v0, v1, s[0:1] offset:2
 ; GFX10-NEXT:    s_endpgm
-  %load = load i8, i8 addrspace(1)* %in, align 2
+  %load = load i8, ptr addrspace(1) %in, align 2
   %zextload = zext i8 %load to i32
-  store i32 %zextload, i32 addrspace(1)* %out, align 2
+  store i32 %zextload, ptr addrspace(1) %out, align 2
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
index fe27f45b1585e..e9b94db905267 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck --check-prefix=GFX11 %s
 
-define i32 @zextload_global_i1_to_i32(i1 addrspace(1)* %ptr) {
+define i32 @zextload_global_i1_to_i32(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i1_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -50,12 +50,12 @@ define i32 @zextload_global_i1_to_i32(i1 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_and_b32_e32 v0, 1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i1, i1 addrspace(1)* %ptr
+  %load = load i1, ptr addrspace(1) %ptr
   %ext = zext i1 %load to i32
   ret i32 %ext
 }
 
-define i32 @zextload_global_i8_to_i32(i8 addrspace(1)* %ptr) {
+define i32 @zextload_global_i8_to_i32(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i8_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -95,12 +95,12 @@ define i32 @zextload_global_i8_to_i32(i8 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_u8 v0, v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i8, i8 addrspace(1)* %ptr
+  %load = load i8, ptr addrspace(1) %ptr
   %ext = zext i8 %load to i32
   ret i32 %ext
 }
 
-define i32 @zextload_global_i16_to_i32(i16 addrspace(1)* %ptr) {
+define i32 @zextload_global_i16_to_i32(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i16_to_i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -140,12 +140,12 @@ define i32 @zextload_global_i16_to_i32(i16 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    global_load_u16 v0, v[0:1], off
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i16, i16 addrspace(1)* %ptr
+  %load = load i16, ptr addrspace(1) %ptr
   %ext = zext i16 %load to i32
   ret i32 %ext
 }
 
-define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) {
+define i64 @zextload_global_i1_to_i64(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i1_to_i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -194,12 +194,12 @@ define i64 @zextload_global_i1_to_i64(i1 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 1, v0
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i1, i1 addrspace(1)* %ptr
+  %load = load i1, ptr addrspace(1) %ptr
   %ext = zext i1 %load to i64
   ret i64 %ext
 }
 
-define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) {
+define i64 @zextload_global_i8_to_i64(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i8_to_i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -244,12 +244,12 @@ define i64 @zextload_global_i8_to_i64(i8 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i8, i8 addrspace(1)* %ptr
+  %load = load i8, ptr addrspace(1) %ptr
   %ext = zext i8 %load to i64
   ret i64 %ext
 }
 
-define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) {
+define i64 @zextload_global_i16_to_i64(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i16_to_i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -294,12 +294,12 @@ define i64 @zextload_global_i16_to_i64(i16 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i16, i16 addrspace(1)* %ptr
+  %load = load i16, ptr addrspace(1) %ptr
   %ext = zext i16 %load to i64
   ret i64 %ext
 }
 
-define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
+define i64 @zextload_global_i32_to_i64(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i32_to_i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -344,12 +344,12 @@ define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i32, i32 addrspace(1)* %ptr
+  %load = load i32, ptr addrspace(1) %ptr
   %ext = zext i32 %load to i64
   ret i64 %ext
 }
 
-define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
+define i96 @zextload_global_i32_to_i96(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i32_to_i96:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -398,12 +398,12 @@ define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i32, i32 addrspace(1)* %ptr
+  %load = load i32, ptr addrspace(1) %ptr
   %ext = zext i32 %load to i96
   ret i96 %ext
 }
 
-define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
+define i128 @zextload_global_i32_to_i128(ptr addrspace(1) %ptr) {
 ; GFX9-LABEL: zextload_global_i32_to_i128:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -457,7 +457,7 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
 ; GFX11-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i32, i32 addrspace(1)* %ptr
+  %load = load i32, ptr addrspace(1) %ptr
   %ext = zext i32 %load to i128
   ret i128 %ext
 }