diff --git a/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll b/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll
index 5ea50e36236e4..b98036693ba07 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/VectorUtils_heuristics.ll
@@ -10,12 +10,12 @@ target triple = "aarch64-unknown-linux-gnu"
 ; CHECK: unreachable
 define void @novel_algorithm() {
 entry:
-  %a = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i8> undef)
+  %a = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr undef, i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i8> undef)
   %b = add <vscale x 16 x i8> undef, %a
-  call void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8> %b, <vscale x 16 x i8>* undef, i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer))
+  call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> %b, ptr undef, i32 1, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> undef, i1 true, i32 0), <vscale x 16 x i1> undef, <vscale x 16 x i32> zeroinitializer))
   unreachable
 }
 
-declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0nxv16i8(<vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
 
-declare void @llvm.masked.store.nxv16i8.p0nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>*, i32 immarg, <vscale x 16 x i1>)
+declare void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8>, ptr, i32 immarg, <vscale x 16 x i1>)
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll
index 2f665d5d6610f..2a73f709e989b 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-gatherscatter.ll
@@ -7,38 +7,36 @@ target triple = "aarch64-unknown-linux-gnu"
 ;; Gathers.
 ;;
 
-define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, double* %x, i64 %base) #0 {
+define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, ptr %x, i64 %base) #0 {
 ; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
-; CHECK-NEXT:    [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP2]], i32 1, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[X:%.*]], i64 [[BASE:%.*]]
+; CHECK-NEXT:    [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[LD]]
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
-  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret <vscale x 2 x double> %ld
 }
 
-define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, double* %x, i64 %base) #0 {
+define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, ptr %x, i64 %base) #0 {
 ; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride2_negtest(
 ; CHECK-NEXT:    [[IDX:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2)
-; CHECK-NEXT:    [[LD:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[PRED:%.*]], double* [[X:%.*]], <vscale x 2 x i64> [[IDX]])
+; CHECK-NEXT:    [[LD:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> [[PRED:%.*]], ptr [[X:%.*]], <vscale x 2 x i64> [[IDX]])
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[LD]]
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2)
-  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret <vscale x 2 x double> %ld
 }
 
-define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, double* align 8 %x, i64 %base) #0 {
+define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, ptr align 8 %x, i64 %base) #0 {
 ; CHECK-LABEL: @test_ld1_gather_index_nxv2f64_stride1_align8(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
-; CHECK-NEXT:    [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0nxv2f64(<vscale x 2 x double>* [[TMP2]], i32 8, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[X:%.*]], i64 [[BASE:%.*]]
+; CHECK-NEXT:    [[LD:%.*]] = call <vscale x 2 x double> @llvm.masked.load.nxv2f64.p0(ptr [[TMP1]], i32 8, <vscale x 2 x i1> [[PRED:%.*]], <vscale x 2 x double> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x double> [[LD]]
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
-  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  %ld = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret <vscale x 2 x double> %ld
 }
 
@@ -46,43 +44,41 @@ define <vscale x 2 x double> @test_ld1_gather_index_nxv2f64_stride1_align8(<vsca
 ;; Scatters.
 ;;
 
-define void @test_st1_scatter_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, double* %x, i64 %base, <vscale x 2 x double> %val) #0 {
+define void @test_st1_scatter_index_nxv2f64_stride1(<vscale x 2 x i1> %pred, ptr %x, i64 %base, <vscale x 2 x double> %val) #0 {
 ; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x double>* [[TMP2]], i32 1, <vscale x 2 x i1> [[PRED:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[X:%.*]], i64 [[BASE:%.*]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[VAL:%.*]], ptr [[TMP1]], i32 1, <vscale x 2 x i1> [[PRED:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
-  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret void
 }
 
-define void @test_st1_scatter_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, double* %x, i64 %base, <vscale x 2 x double> %val) #0 {
+define void @test_st1_scatter_index_nxv2f64_stride2_negtest(<vscale x 2 x i1> %pred, ptr %x, i64 %base, <vscale x 2 x double> %val) #0 {
 ; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride2_negtest(
 ; CHECK-NEXT:    [[IDX:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 [[BASE:%.*]], i64 2)
-; CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x i1> [[PRED:%.*]], double* [[X:%.*]], <vscale x 2 x i64> [[IDX]])
+; CHECK-NEXT:    tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x i1> [[PRED:%.*]], ptr [[X:%.*]], <vscale x 2 x i64> [[IDX]])
 ; CHECK-NEXT:    ret void
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 2)
-  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret void
 }
 
-define void @test_st1_scatter_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, double* align 8 %x, i64 %base, <vscale x 2 x double> %val) #0 {
+define void @test_st1_scatter_index_nxv2f64_stride1_align8(<vscale x 2 x i1> %pred, ptr align 8 %x, i64 %base, <vscale x 2 x double> %val) #0 {
 ; CHECK-LABEL: @test_st1_scatter_index_nxv2f64_stride1_align8(
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, double* [[X:%.*]], i64 [[BASE:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <vscale x 2 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0nxv2f64(<vscale x 2 x double> [[VAL:%.*]], <vscale x 2 x double>* [[TMP2]], i32 8, <vscale x 2 x i1> [[PRED:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[X:%.*]], i64 [[BASE:%.*]]
+; CHECK-NEXT:    call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[VAL:%.*]], ptr [[TMP1]], i32 8, <vscale x 2 x i1> [[PRED:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   %idx = tail call <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64 %base, i64 1)
-  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, double* %x, <vscale x 2 x i64> %idx)
+  tail call void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double> %val, <vscale x 2 x i1> %pred, ptr %x, <vscale x 2 x i64> %idx)
   ret void
 }
 
 declare <vscale x 2 x i64> @llvm.aarch64.sve.index.nxv2i64(i64, i64)
-declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
-declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*, <vscale x 2 x i64>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
+declare void @llvm.aarch64.sve.st1.scatter.index.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
 
 attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
index d944a770d2881..c67662f872503 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-loadstore.ll
@@ -3,107 +3,99 @@
 
 target triple = "aarch64-unknown-linux-gnu"
 
-define <vscale x 4 x i32> @combine_ld1(i32* %ptr) #0 {
+define <vscale x 4 x i32> @combine_ld1(ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_ld1(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr %ptr), !annotation !0
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 4 x i32> @combine_ld1_casted_predicate(i32* %ptr) #0 {
+define <vscale x 4 x i32> @combine_ld1_casted_predicate(ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_ld1_casted_predicate(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
+; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, ptr [[PTR:%.*]], align 16, !annotation !0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
 ;
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
   %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
-  %4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, i32* %ptr), !annotation !0
+  %4 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %3, ptr %ptr), !annotation !0
   ret <vscale x 4 x i32> %4
 }
 
-define <vscale x 4 x i32> @combine_ld1_masked(i32* %ptr) #0 {
+define <vscale x 4 x i32> @combine_ld1_masked(ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_ld1_masked(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0nxv4i32(<vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
+; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]], <vscale x 4 x i32> zeroinitializer), !annotation !0
 ; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, i32* %ptr), !annotation !0
+  %2 = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %1, ptr %ptr), !annotation !0
   ret <vscale x 4 x i32> %2
 }
 
-define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(i16* %ptr) #0 {
+define <vscale x 8 x i16> @combine_ld1_masked_casted_predicate(ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_ld1_masked_casted_predicate(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
-; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0nxv8i16(<vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]], <vscale x 8 x i16> zeroinitializer), !annotation !0
 ; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP5]]
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
   %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
-  %4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, i16* %ptr), !annotation !0
+  %4 = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %3, ptr %ptr), !annotation !0
   ret <vscale x 8 x i16> %4
 }
 
-define void @combine_st1(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
+define void @combine_st1(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_st1(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
+; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
 ; CHECK-NEXT:    ret void
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
+  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr %ptr), !annotation !0
   ret void
 }
 
-define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
+define void @combine_st1_casted_predicate(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_st1_casted_predicate(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16, !annotation !0
+; CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], align 16, !annotation !0
 ; CHECK-NEXT:    ret void
 ;
   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %1)
   %3 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %2)
-  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, i32* %ptr), !annotation !0
+  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %3, ptr %ptr), !annotation !0
   ret void
 }
 
-define void @combine_st1_masked(<vscale x 4 x i32> %vec, i32* %ptr) #0 {
+define void @combine_st1_masked(<vscale x 4 x i32> %vec, ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_st1_masked(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[PTR:%.*]] to <vscale x 4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0nxv4i32(<vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[TMP2]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
+; CHECK-NEXT:    call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 4 x i1> [[TMP1]]), !annotation !0
 ; CHECK-NEXT:    ret void
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 16)
-  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, i32* %ptr), !annotation !0
+  call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %vec, <vscale x 4 x i1> %1, ptr %ptr), !annotation !0
   ret void
 }
 
-define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, i16* %ptr) #0 {
+define void @combine_st1_masked_casted_predicate(<vscale x 8 x i16> %vec, ptr %ptr) #0 {
 ; CHECK-LABEL: @combine_st1_masked_casted_predicate(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
 ; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> [[TMP1]])
 ; CHECK-NEXT:    [[TMP3:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[PTR:%.*]] to <vscale x 8 x i16>*
-; CHECK-NEXT:    call void @llvm.masked.store.nxv8i16.p0nxv8i16(<vscale x 8 x i16> [[VEC:%.*]], <vscale x 8 x i16>* [[TMP4]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
+; CHECK-NEXT:    call void @llvm.masked.store.nxv8i16.p0(<vscale x 8 x i16> [[VEC:%.*]], ptr [[PTR:%.*]], i32 1, <vscale x 8 x i1> [[TMP3]]), !annotation !0
 ; CHECK-NEXT:    ret void
 ;
   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %2 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1)
   %3 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2)
-  call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, i16* %ptr), !annotation !0
+  call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %vec, <vscale x 8 x i1> %3, ptr %ptr), !annotation !0
   ret void
 }
 
@@ -113,15 +105,15 @@ declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x
 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1>)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
 
-declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, i32*)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, i16*)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr)
 
 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
 declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
 
-declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
-declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
+declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
+declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
 
 attributes #0 = { "target-features"="+sve" }
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll
index 01803097c7a8a..0cb9b4b84a556 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics-gfx8.ll
@@ -12,97 +12,97 @@ declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float,
 declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[R32:%.*]] = fpext half [[R:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[R32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[FACE32:%.*]] = fpext half [[FACE:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[FACE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %face32 = fpext half %face to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float [[S32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float [[S32]], float [[T32]], float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 91f7a8cae7437..88de608777c59 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2508,23 +2508,23 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
 define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
 ; CHECK-LABEL: @readfirstlane_constant(
 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[ARG:%.*]])
-; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
+; CHECK-NEXT:    store volatile i32 [[VAR]], ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 0, ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 123, ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 undef, ptr undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
   %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0)
   %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123)
-  %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32))
+  %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (ptr @gv to i32))
   %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
-  store volatile i32 %var, i32* undef
-  store volatile i32 %zero, i32* undef
-  store volatile i32 %imm, i32* undef
-  store volatile i32 %constexpr, i32* undef
-  store volatile i32 %undef, i32* undef
+  store volatile i32 %var, ptr undef
+  store volatile i32 %zero, ptr undef
+  store volatile i32 %imm, ptr undef
+  store volatile i32 %constexpr, ptr undef
+  store volatile i32 %undef, ptr undef
   ret void
 }
 
@@ -2594,23 +2594,23 @@ declare i32 @llvm.amdgcn.readlane(i32, i32)
 define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
 ; CHECK-LABEL: @readlane_constant(
 ; CHECK-NEXT:    [[VAR:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[ARG:%.*]], i32 7)
-; CHECK-NEXT:    store volatile i32 [[VAR]], i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 0, i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 123, i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
-; CHECK-NEXT:    store volatile i32 undef, i32* undef, align 4
+; CHECK-NEXT:    store volatile i32 [[VAR]], ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 0, ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 123, ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 ptrtoint (ptr @gv to i32), ptr undef, align 4
+; CHECK-NEXT:    store volatile i32 undef, ptr undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
   %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane)
   %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane)
-  %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane)
+  %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (ptr @gv to i32), i32 %lane)
   %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane)
-  store volatile i32 %var, i32* undef
-  store volatile i32 %zero, i32* undef
-  store volatile i32 %imm, i32* undef
-  store volatile i32 %constexpr, i32* undef
-  store volatile i32 %undef, i32* undef
+  store volatile i32 %var, ptr undef
+  store volatile i32 %zero, ptr undef
+  store volatile i32 %imm, ptr undef
+  store volatile i32 %constexpr, ptr undef
+  store volatile i32 %undef, ptr undef
   ret void
 }
 
@@ -2688,36 +2688,36 @@ bb1:
 
 declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
 
-define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+define amdgpu_kernel void @update_dpp_no_combine(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
 ; CHECK-LABEL: @update_dpp_no_combine(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[IN1:%.*]], i32 [[IN2:%.*]], i32 1, i32 1, i32 1, i1 false)
-; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0)
-  store i32 %tmp0, i32 addrspace(1)* %out
+  store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) {
+define amdgpu_kernel void @update_dpp_drop_old(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
 ; CHECK-LABEL: @update_dpp_drop_old(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN2:%.*]], i32 3, i32 15, i32 15, i1 true)
-; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1)
-  store i32 %tmp0, i32 addrspace(1)* %out
+  store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) {
+define amdgpu_kernel void @update_dpp_undef_old(ptr addrspace(1) %out, i32 %in1) {
 ; CHECK-LABEL: @update_dpp_undef_old(
 ; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 [[IN1:%.*]], i32 4, i32 15, i32 15, i1 true)
-; CHECK-NEXT:    store i32 [[TMP0]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP0]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1)
-  store i32 %tmp0, i32 addrspace(1)* %out
+  store i32 %tmp0, ptr addrspace(1) %out
   ret void
 }
 
@@ -2728,36 +2728,36 @@ define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1
 
 declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1 immarg, i1 immarg)
 
-define amdgpu_kernel void @permlane16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlane16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @permlane16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlane16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlane16_fetch_invalid_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -2767,36 +2767,36 @@ define amdgpu_kernel void @permlane16_fetch_invalid_bound_ctrl(i32 addrspace(1)*
 
 declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1 immarg, i1 immarg)
 
-define amdgpu_kernel void @permlanex16(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlanex16(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 false)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 false)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @permlanex16_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlanex16_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 false, i1 true)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 false, i1 true)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) {
+define amdgpu_kernel void @permlanex16_fetch_invalid_bound_ctrl(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; CHECK-LABEL: @permlanex16_fetch_invalid_bound_ctrl(
 ; CHECK-NEXT:    [[RES:%.*]] = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 [[SRC0:%.*]], i32 [[SRC1:%.*]], i32 [[SRC2:%.*]], i1 true, i1 true)
-; CHECK-NEXT:    store i32 [[RES]], i32 addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store i32 [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %res = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %src0, i32 %src1, i32 %src2, i1 true, i1 true)
-  store i32 %res, i32 addrspace(1)* %out
+  store i32 %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -2859,309 +2859,309 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, floa
 declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+define amdgpu_kernel void @image_sample_a16_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+define amdgpu_kernel void @image_sample_a16_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %face32 = fpext half %face to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+define amdgpu_kernel void @image_sample_a16_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+define amdgpu_kernel void @image_sample_a16_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
+define amdgpu_kernel void @image_sample_a16_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
 ; CHECK-LABEL: @image_sample_a16_b16_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+define amdgpu_kernel void @image_sample_a16_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
 ; CHECK-LABEL: @image_sample_a16_b32_1d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_b16_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_b32_2d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_b16_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_b16_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_b32_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_b32_1d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_b16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_b16_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_b32_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_b32_2d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b16_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b32_cl_1d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b16_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
@@ -3169,60 +3169,60 @@ define amdgpu_kernel void @image_sample_a16_b16_cl_2d(<4 x float> addrspace(1)*
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b32_cl_2d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %bias32 = fpext half %bias to float
@@ -3230,45 +3230,45 @@ define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(<4 x float> addrspace(1)
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b32_cl_2d(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
 ; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+define amdgpu_kernel void @image_sample_a16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_d_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_d_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3278,14 +3278,14 @@ define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out,
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
+define amdgpu_kernel void @image_sample_a16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_d_3d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3298,28 +3298,28 @@ define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out,
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_d_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_d_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3329,14 +3329,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %ou
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3344,14 +3344,14 @@ define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %o
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3362,14 +3362,14 @@ define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %o
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3377,14 +3377,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)*
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3395,28 +3395,28 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)*
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
+define amdgpu_kernel void @image_sample_a16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_cd_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_cd_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3426,28 +3426,28 @@ define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3457,14 +3457,14 @@ define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %o
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3472,14 +3472,14 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3490,14 +3490,14 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3505,14 +3505,14 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)*
   %s32 = fpext half %s to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
+define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3523,118 +3523,118 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)*
   %t32 = fpext half %t to float
   %clamp32 = fpext half %clamp to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
+define amdgpu_kernel void @image_sample_a16_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
+define amdgpu_kernel void @image_sample_a16_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
+define amdgpu_kernel void @image_sample_a16_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
+define amdgpu_kernel void @image_sample_a16_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %lod32 = fpext half %lod to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, float %lod32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_lz_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_lz_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
+define amdgpu_kernel void @image_sample_a16_c_lz_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_c_lz_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1(
 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3645,14 +3645,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)*
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store float %res, float addrspace(1)* %out
+  store float %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2(
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3663,14 +3663,14 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspa
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const(
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3680,16 +3680,16 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(<2 x float> addr
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt(
 ; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
 ; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3699,70 +3699,70 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(<2 x float
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_load_a16_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+define amdgpu_kernel void @image_load_a16_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) {
 ; CHECK-LABEL: @image_load_a16_mip_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = zext i16 %s to i32
   %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_load_a16_mip_1d_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+define amdgpu_kernel void @image_load_a16_mip_1d_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) {
 ; CHECK-LABEL: @image_load_a16_mip_1d_noopt(
 ; CHECK-NEXT:    [[S32:%.*]] = sext i16 [[S:%.*]] to i32
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = sext i16 %s to i32
   %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_load_a16_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) {
+define amdgpu_kernel void @image_load_a16_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) {
 ; CHECK-LABEL: @image_load_a16_mip_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = zext i16 %s to i32
   %t32 = zext i16 %t to i32
   %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_load_a16_mip_2d_const(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+define amdgpu_kernel void @image_load_a16_mip_2d_const(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) {
 ; CHECK-LABEL: @image_load_a16_mip_2d_const(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = zext i16 %s to i32
   %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i16 %s) {
 ; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt(
 ; CHECK-NEXT:    [[S32:%.*]] = zext i16 [[S:%.*]] to i32
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = zext i16 %s to i32
   %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -3770,23 +3770,23 @@ define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(<4 x float> addrspa
 ; llvm.amdgcn.image.sample g16
 ; --------------------------------------------------------------------
 
-define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+define amdgpu_kernel void @image_sample_g16_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_d_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+define amdgpu_kernel void @image_sample_g16_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_d_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3794,14 +3794,14 @@ define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out,
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
+define amdgpu_kernel void @image_sample_g16_d_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
 ; CHECK-LABEL: @image_sample_g16_d_3d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3811,27 +3811,27 @@ define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out,
   %dtdv32 = fpext half %dtdv to float
   %drdv32 = fpext half %drdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %drdh32, float %dsdv32, float %dtdv32, float %drdv32, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+define amdgpu_kernel void @image_sample_g16_c_d_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_d_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+define amdgpu_kernel void @image_sample_g16_c_d_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_d_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3839,27 +3839,27 @@ define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %ou
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3867,27 +3867,27 @@ define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %o
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3895,27 +3895,27 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)*
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
+define amdgpu_kernel void @image_sample_g16_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_cd_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+define amdgpu_kernel void @image_sample_g16_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_cd_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3923,27 +3923,27 @@ define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
+define amdgpu_kernel void @image_sample_g16_c_cd_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
+define amdgpu_kernel void @image_sample_g16_c_cd_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3951,27 +3951,27 @@ define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %o
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3979,27 +3979,27 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
   %dsdv32 = fpext half %dsdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dsdv32, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d(
 ; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -4007,14 +4007,14 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)*
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1(
 ; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    store float [[RES]], ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -4022,14 +4022,14 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)*
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store float %res, float addrspace(1)* %out
+  store float %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
+define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2(
 ; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    store <2 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -4037,7 +4037,7 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspa
   %dsdv32 = fpext half %dsdv to float
   %dtdv32 = fpext half %dtdv to float
   %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  store <2 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -4045,108 +4045,108 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspa
 ; llvm.amdgcn.image.sample a16 preserve fast-math flags
 ; --------------------------------------------------------------------
 
-define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_1d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @image_sample_a16_1d_fast(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_fast(
 ; CHECK-NEXT:    [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %res = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
+define amdgpu_kernel void @image_sample_a16_2d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
+define amdgpu_kernel void @image_sample_a16_3d_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %r32 = fpext half %r to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s32, float %t32, float %r32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
+define amdgpu_kernel void @image_sample_a16_cube_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %face32 = fpext half %face to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s32, float %t32, float %face32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
+define amdgpu_kernel void @image_sample_a16_1darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %slice32 = fpext half %slice to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
+define amdgpu_kernel void @image_sample_a16_2darray_nnan(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray_nnan(
 ; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[RES]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
   %t32 = fpext half %t to float
   %slice32 = fpext half %slice to float
   %res = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s32, float %t32, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
 
@@ -4165,172 +4165,172 @@ declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float,
 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-define amdgpu_kernel void @sample_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+define amdgpu_kernel void @sample_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
 ; CHECK-LABEL: @sample_l_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+define amdgpu_kernel void @sample_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @sample_l_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+define amdgpu_kernel void @sample_c_l_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
 ; CHECK-LABEL: @sample_c_l_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+define amdgpu_kernel void @sample_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @sample_c_l_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
+define amdgpu_kernel void @sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
 ; CHECK-LABEL: @sample_l_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+define amdgpu_kernel void @sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @sample_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
+define amdgpu_kernel void @sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
 ; CHECK-LABEL: @sample_c_l_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+define amdgpu_kernel void @sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @sample_c_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+define amdgpu_kernel void @gather4_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @gather4_l_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+define amdgpu_kernel void @gather4_c_l_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @gather4_c_l_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+define amdgpu_kernel void @gather4_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @gather4_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+define amdgpu_kernel void @gather4_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @gather4_c_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_c_l_o_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) {
+define amdgpu_kernel void @gather4_c_l_o_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) {
 ; CHECK-LABEL: @gather4_c_l_o_2darray(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -4338,86 +4338,86 @@ main_body:
 ; llvm.amdgcn.image.sample mipmap zero
 ; --------------------------------------------------------------------
 
-define amdgpu_kernel void @load_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s) {
+define amdgpu_kernel void @load_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s) {
 ; CHECK-LABEL: @load_mip_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+define amdgpu_kernel void @load_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
 ; CHECK-LABEL: @load_mip_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @load_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @load_mip_3d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+define amdgpu_kernel void @load_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
 ; CHECK-LABEL: @load_mip_1darray(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @load_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @load_mip_2darray(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @load_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @load_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @load_mip_cube(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
 
-define amdgpu_kernel void @store_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+define amdgpu_kernel void @store_mip_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
 ; CHECK-LABEL: @store_mip_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4428,7 +4428,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @store_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+define amdgpu_kernel void @store_mip_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
 ; CHECK-LABEL: @store_mip_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4439,7 +4439,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @store_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @store_mip_3d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @store_mip_3d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4450,7 +4450,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @store_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+define amdgpu_kernel void @store_mip_1darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
 ; CHECK-LABEL: @store_mip_1darray(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4461,7 +4461,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @store_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @store_mip_2darray(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @store_mip_2darray(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4472,7 +4472,7 @@ main_body:
   ret void
 }
 
-define amdgpu_kernel void @store_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+define amdgpu_kernel void @store_mip_cube(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
 ; CHECK-LABEL: @store_mip_cube(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
@@ -4517,215 +4517,215 @@ declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, floa
 declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
 
-define amdgpu_kernel void @sample_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+define amdgpu_kernel void @sample_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
 ; CHECK-LABEL: @sample_b_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+define amdgpu_kernel void @sample_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; CHECK-LABEL: @sample_b_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+define amdgpu_kernel void @sample_c_b_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
 ; CHECK-LABEL: @sample_c_b_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @sample_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @sample_c_b_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
+define amdgpu_kernel void @sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
 ; CHECK-LABEL: @sample_b_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+define amdgpu_kernel void @sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 ; CHECK-LABEL: @sample_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
+define amdgpu_kernel void @sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
 ; CHECK-LABEL: @sample_c_b_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @sample_c_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+define amdgpu_kernel void @gather4_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; CHECK-LABEL: @gather4_b_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @gather4_c_b_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @gather4_c_b_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15,  float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+define amdgpu_kernel void @gather4_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
 ; CHECK-LABEL: @gather4_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @gather4_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @gather4_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @gather4_c_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset,  float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @sample_c_b_o_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
+define amdgpu_kernel void @sample_c_b_o_a16_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @sample_c_b_o_a16_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
 ; Check that bias is not optimized away if > 0
-define amdgpu_kernel void @sample_b_1d_pos(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+define amdgpu_kernel void @sample_b_1d_pos(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
 ; CHECK-LABEL: @sample_b_1d_pos(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
 ; Check that bias is not optimized away if < 0
-define amdgpu_kernel void @sample_b_1d_neg(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+define amdgpu_kernel void @sample_b_1d_neg(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
 ; CHECK-LABEL: @sample_b_1d_neg(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
 ; Zero bias + A16
-define amdgpu_kernel void @sample_b_1d_a16(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+define amdgpu_kernel void @sample_b_1d_a16(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @sample_b_1d_a16(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %s32 = fpext half %s to float
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -4733,523 +4733,523 @@ main_body:
 ; llvm.amdgcn.image.sample offset zero
 ; --------------------------------------------------------------------
 
-define amdgpu_kernel void @offset_sample_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+define amdgpu_kernel void @offset_sample_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
 ; CHECK-LABEL: @offset_sample_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+define amdgpu_kernel void @offset_sample_c_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
 ; CHECK-LABEL: @offset_sample_c_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_c_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_c_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
+define amdgpu_kernel void @offset_sample_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
 ; CHECK-LABEL: @offset_sample_b_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
+define amdgpu_kernel void @offset_sample_c_b_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
 ; CHECK-LABEL: @offset_sample_c_b_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_c_b_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_c_b_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_b_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_b_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_b_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_b_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+define amdgpu_kernel void @offset_sample_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
 ; CHECK-LABEL: @offset_sample_d_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_d_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+define amdgpu_kernel void @offset_sample_c_d_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
 ; CHECK-LABEL: @offset_sample_c_d_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_c_d_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_c_d_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_d_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_d_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_d_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_d_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+define amdgpu_kernel void @offset_sample_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
 ; CHECK-LABEL: @offset_sample_cd_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_cd_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+define amdgpu_kernel void @offset_sample_c_cd_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
 ; CHECK-LABEL: @offset_sample_c_cd_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_c_cd_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_c_cd_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_cd_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_cd_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+define amdgpu_kernel void @offset_sample_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
 ; CHECK-LABEL: @offset_sample_l_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+define amdgpu_kernel void @offset_sample_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @offset_sample_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+define amdgpu_kernel void @offset_sample_c_l_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
 ; CHECK-LABEL: @offset_sample_c_l_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+define amdgpu_kernel void @offset_sample_c_l_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
 ; CHECK-LABEL: @offset_sample_c_l_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+define amdgpu_kernel void @offset_sample_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
 ; CHECK-LABEL: @offset_sample_lz_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_lz_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+define amdgpu_kernel void @offset_sample_c_lz_o_1d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
 ; CHECK-LABEL: @offset_sample_c_lz_o_1d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
-define amdgpu_kernel void @offset_sample_c_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+define amdgpu_kernel void @offset_sample_c_lz_o_2d(ptr addrspace(1) %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
 ; CHECK-LABEL: @offset_sample_c_lz_o_2d(
 ; CHECK-NEXT:  main_body:
 ; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    store <4 x float> [[V]], ptr addrspace(1) [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
 main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  store <4 x float> %v, ptr addrspace(1) %out
   ret void
 }
 
@@ -5298,13 +5298,13 @@ declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, floa
 ; llvm.amdgcn.is.shared
 ; --------------------------------------------------------------------
 
-declare i1 @llvm.amdgcn.is.shared(i8*) nounwind readnone
+declare i1 @llvm.amdgcn.is.shared(ptr) nounwind readnone
 
 define i1 @test_is_shared_null() nounwind {
 ; CHECK-LABEL: @test_is_shared_null(
 ; CHECK-NEXT:    ret i1 false
 ;
-  %val = call i1 @llvm.amdgcn.is.shared(i8* null)
+  %val = call i1 @llvm.amdgcn.is.shared(ptr null)
   ret i1 %val
 }
 
@@ -5312,7 +5312,7 @@ define i1 @test_is_shared_undef() nounwind {
 ; CHECK-LABEL: @test_is_shared_undef(
 ; CHECK-NEXT:    ret i1 undef
 ;
-  %val = call i1 @llvm.amdgcn.is.shared(i8* undef)
+  %val = call i1 @llvm.amdgcn.is.shared(ptr undef)
   ret i1 %val
 }
 
@@ -5320,13 +5320,13 @@ define i1 @test_is_shared_undef() nounwind {
 ; llvm.amdgcn.is.private
 ; --------------------------------------------------------------------
 
-declare i1 @llvm.amdgcn.is.private(i8*) nounwind readnone
+declare i1 @llvm.amdgcn.is.private(ptr) nounwind readnone
 
 define i1 @test_is_private_null() nounwind {
 ; CHECK-LABEL: @test_is_private_null(
 ; CHECK-NEXT:    ret i1 false
 ;
-  %val = call i1 @llvm.amdgcn.is.private(i8* null)
+  %val = call i1 @llvm.amdgcn.is.private(ptr null)
   ret i1 %val
 }
 
@@ -5334,6 +5334,6 @@ define i1 @test_is_private_undef() nounwind {
 ; CHECK-LABEL: @test_is_private_undef(
 ; CHECK-NEXT:    ret i1 undef
 ;
-  %val = call i1 @llvm.amdgcn.is.private(i8* undef)
+  %val = call i1 @llvm.amdgcn.is.private(ptr undef)
   ret i1 %val
 }
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll b/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll
index 1e97cfc45013e..c4b82193a1a10 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/ldexp.ll
@@ -13,61 +13,61 @@ define float @ldexp_f32_undef_undef() {
 ; constant or not.
 define void @ldexp_f32_exp0(float %x) {
 ; CHECK-LABEL: @ldexp_f32_exp0(
-; CHECK-NEXT:    store volatile float [[X:%.*]], float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float [[X]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[X:%.*]], ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float [[X]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[ONE:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X]], i32 1)
-; CHECK-NEXT:    store volatile float [[ONE]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[ONE]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %zero = call float @llvm.amdgcn.ldexp.f32(float %x, i32 0)
-  store volatile float %zero, float addrspace(1)* undef
+  store volatile float %zero, ptr addrspace(1) undef
 
   %undef = call float @llvm.amdgcn.ldexp.f32(float %x, i32 undef)
-  store volatile float %undef, float addrspace(1)* undef
+  store volatile float %undef, ptr addrspace(1) undef
 
   %one = call float @llvm.amdgcn.ldexp.f32(float %x, i32 1)
-  store volatile float %one, float addrspace(1)* undef
+  store volatile float %one, ptr addrspace(1) undef
   ret void
 }
 
 ; Test variable exponent but zero or undef value.
 define void @ldexp_f32_val0(i32 %y) {
 ; CHECK-LABEL: @ldexp_f32_val0(
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x7FF8000000000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF8000000000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 %y)
-  store volatile float %zero, float addrspace(1)* undef
+  store volatile float %zero, ptr addrspace(1) undef
 
   %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 %y)
-  store volatile float %neg.zero, float addrspace(1)* undef
+  store volatile float %neg.zero, ptr addrspace(1) undef
 
   %undef = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %y)
-  store volatile float %undef, float addrspace(1)* undef
+  store volatile float %undef, ptr addrspace(1) undef
   ret void
 }
 
 define void @ldexp_f32_val_infinity(i32 %y) {
 ; CHECK-LABEL: @ldexp_f32_val_infinity(
-; CHECK-NEXT:    store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xFFF0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xFFF0000000000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %inf = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0000000000000, i32 %y)
-  store volatile float %inf, float addrspace(1)* undef
+  store volatile float %inf, ptr addrspace(1) undef
 
   %neg.inf = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000000000000, i32 %y)
-  store volatile float %neg.inf, float addrspace(1)* undef
+  store volatile float %neg.inf, ptr addrspace(1) undef
 
   %inf.zero = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0000000000000, i32 0)
-  store volatile float %inf.zero, float addrspace(1)* undef
+  store volatile float %inf.zero, ptr addrspace(1) undef
 
   %neg.inf.zero = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000000000000, i32 0)
-  store volatile float %neg.inf.zero, float addrspace(1)* undef
+  store volatile float %neg.inf.zero, ptr addrspace(1) undef
 
   ret void
 }
@@ -76,23 +76,23 @@ define void @ldexp_f32_val_infinity(i32 %y) {
 ; Technically this depends on the ieee_mode in the mode register.
 define void @ldexp_f32_val_nan(i32 %y) {
 ; CHECK-LABEL: @ldexp_f32_val_nan(
-; CHECK-NEXT:    store volatile float 0x7FF8001000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xFFF8000100000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x7FF8000020000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xFFFFFFFFE0000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF8001000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xFFF8000100000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF8000020000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xFFFFFFFFE0000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %plus.qnan = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0001000000000, i32 %y)
-  store volatile float %plus.qnan, float addrspace(1)* undef
+  store volatile float %plus.qnan, ptr addrspace(1) undef
 
   %neg.qnan = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000100000000, i32 %y)
-  store volatile float %neg.qnan, float addrspace(1)* undef
+  store volatile float %neg.qnan, ptr addrspace(1) undef
 
   %plus.snan = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 %y)
-  store volatile float %plus.snan, float addrspace(1)* undef
+  store volatile float %plus.snan, ptr addrspace(1) undef
 
   %neg.snan = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 %y)
-  store volatile float %neg.snan, float addrspace(1)* undef
+  store volatile float %neg.snan, ptr addrspace(1) undef
 
   ret void
 }
@@ -100,65 +100,65 @@ define void @ldexp_f32_val_nan(i32 %y) {
 define void @ldexp_f32_val_nan_strictfp(i32 %y) #0 {
 ; CHECK-LABEL: @ldexp_f32_val_nan_strictfp(
 ; CHECK-NEXT:    [[PLUS_QNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0001000000000, i32 [[Y:%.*]]) [[ATTR0:#.*]]
-; CHECK-NEXT:    store volatile float [[PLUS_QNAN]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[PLUS_QNAN]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[NEG_QNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0xFFF0000100000000, i32 [[Y]]) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[NEG_QNAN]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[NEG_QNAN]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[PLUS_SNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 [[Y]]) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[PLUS_SNAN]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[PLUS_SNAN]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[NEG_SNAN:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 [[Y]]) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[NEG_SNAN]], float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x7FF8000000000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[NEG_SNAN]], ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF8000000000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %plus.qnan = call float @llvm.amdgcn.ldexp.f32(float 0x7ff0001000000000, i32 %y) #0
-  store volatile float %plus.qnan, float addrspace(1)* undef
+  store volatile float %plus.qnan, ptr addrspace(1) undef
 
   %neg.qnan = call float @llvm.amdgcn.ldexp.f32(float 0xfff0000100000000, i32 %y) #0
-  store volatile float %neg.qnan, float addrspace(1)* undef
+  store volatile float %neg.qnan, ptr addrspace(1) undef
 
   %plus.snan = call float @llvm.amdgcn.ldexp.f32(float 0x7FF0000020000000, i32 %y) #0
-  store volatile float %plus.snan, float addrspace(1)* undef
+  store volatile float %plus.snan, ptr addrspace(1) undef
 
   %neg.snan = call float @llvm.amdgcn.ldexp.f32(float 0xFFF7FFFFE0000000, i32 %y) #0
-  store volatile float %neg.snan, float addrspace(1)* undef
+  store volatile float %neg.snan, ptr addrspace(1) undef
 
   %undef = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %y) #0
-  store volatile float %undef, float addrspace(1)* undef
+  store volatile float %undef, ptr addrspace(1) undef
 
   ret void
 }
 
 define void @ldexp_f32_0() {
 ; CHECK-LABEL: @ldexp_f32_0(
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 0)
-  store volatile float %zero, float addrspace(1)* undef
+  store volatile float %zero, ptr addrspace(1) undef
 
   %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 0)
-  store volatile float %neg.zero, float addrspace(1)* undef
+  store volatile float %neg.zero, ptr addrspace(1) undef
 
   %one = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 1)
-  store volatile float %one, float addrspace(1)* undef
+  store volatile float %one, ptr addrspace(1) undef
 
   %min.exp = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 -126)
-  store volatile float %min.exp, float addrspace(1)* undef
+  store volatile float %min.exp, ptr addrspace(1) undef
 
   %min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 -127)
-  store volatile float %min.exp.sub1, float addrspace(1)* undef
+  store volatile float %min.exp.sub1, ptr addrspace(1) undef
 
   %max.exp = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 127)
-  store volatile float %max.exp, float addrspace(1)* undef
+  store volatile float %max.exp, ptr addrspace(1) undef
 
   %max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 128)
-  store volatile float %max.exp.plus1, float addrspace(1)* undef
+  store volatile float %max.exp.plus1, ptr addrspace(1) undef
 
   ret void
 }
@@ -166,114 +166,114 @@ define void @ldexp_f32_0() {
 ; Should be able to ignore strictfp in this case
 define void @ldexp_f32_0_strictfp(float %x) #0 {
 ; CHECK-LABEL: @ldexp_f32_0_strictfp(
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -0.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0.000000e+00, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -0.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0.000000e+00, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[UNKNOWN_ZERO:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X:%.*]], i32 0) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[UNKNOWN_ZERO]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[UNKNOWN_ZERO]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[UNKNOWN_UNDEF:%.*]] = call float @llvm.amdgcn.ldexp.f32(float [[X]], i32 undef) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[UNKNOWN_UNDEF]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[UNKNOWN_UNDEF]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[DENORMAL_0:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[DENORMAL_0]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[DENORMAL_0]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    [[DENORMAL_1:%.*]] = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1) [[ATTR0]]
-; CHECK-NEXT:    store volatile float [[DENORMAL_1]], float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float [[DENORMAL_1]], ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %zero = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 0) #0
-  store volatile float %zero, float addrspace(1)* undef
+  store volatile float %zero, ptr addrspace(1) undef
 
   %neg.zero = call float @llvm.amdgcn.ldexp.f32(float -0.0, i32 0) #0
-  store volatile float %neg.zero, float addrspace(1)* undef
+  store volatile float %neg.zero, ptr addrspace(1) undef
 
   %one = call float @llvm.amdgcn.ldexp.f32(float 0.0, i32 1) #0
-  store volatile float %one, float addrspace(1)* undef
+  store volatile float %one, ptr addrspace(1) undef
 
   %unknown.zero = call float @llvm.amdgcn.ldexp.f32(float %x, i32 0) #0
-  store volatile float %unknown.zero, float addrspace(1)* undef
+  store volatile float %unknown.zero, ptr addrspace(1) undef
 
   %unknown.undef = call float @llvm.amdgcn.ldexp.f32(float %x, i32 undef) #0
-  store volatile float %unknown.undef, float addrspace(1)* undef
+  store volatile float %unknown.undef, ptr addrspace(1) undef
 
   %denormal.0 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0) #0
-  store volatile float %denormal.0, float addrspace(1)* undef
+  store volatile float %denormal.0, ptr addrspace(1) undef
 
   %denormal.1 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1) #0
-  store volatile float %denormal.1, float addrspace(1)* undef
+  store volatile float %denormal.1, ptr addrspace(1) undef
 
   ret void
 }
 
 define void @ldexp_f32() {
 ; CHECK-LABEL: @ldexp_f32(
-; CHECK-NEXT:    store volatile float 2.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 4.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 8.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 5.000000e-01, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x3810000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x3800000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x47E0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x7FF0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -2.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -4.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -8.000000e+00, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float -5.000000e-01, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xB810000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xB800000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xC7E0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0xFFF0000000000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x44D5000000000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 2.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 4.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 8.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 5.000000e-01, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x3810000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x3800000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x47E0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x7FF0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -2.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -4.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -8.000000e+00, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float -5.000000e-01, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xB810000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xB800000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xC7E0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0xFFF0000000000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x44D5000000000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %one.one = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 1)
-  store volatile float %one.one, float addrspace(1)* undef
+  store volatile float %one.one, ptr addrspace(1) undef
 
   %one.two = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 2)
-  store volatile float %one.two, float addrspace(1)* undef
+  store volatile float %one.two, ptr addrspace(1) undef
 
   %one.three = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 3)
-  store volatile float %one.three, float addrspace(1)* undef
+  store volatile float %one.three, ptr addrspace(1) undef
 
   %one.negone = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -1)
-  store volatile float %one.negone, float addrspace(1)* undef
+  store volatile float %one.negone, ptr addrspace(1) undef
 
   %one.min.exp = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -126)
-  store volatile float %one.min.exp, float addrspace(1)* undef
+  store volatile float %one.min.exp, ptr addrspace(1) undef
 
   %one.min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 -127)
-  store volatile float %one.min.exp.sub1, float addrspace(1)* undef
+  store volatile float %one.min.exp.sub1, ptr addrspace(1) undef
 
   %one.max.exp = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 127)
-  store volatile float %one.max.exp, float addrspace(1)* undef
+  store volatile float %one.max.exp, ptr addrspace(1) undef
 
   %one.max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float 1.0, i32 128)
-  store volatile float %one.max.exp.plus1, float addrspace(1)* undef
+  store volatile float %one.max.exp.plus1, ptr addrspace(1) undef
 
   %neg.one.one = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 1)
-  store volatile float %neg.one.one, float addrspace(1)* undef
+  store volatile float %neg.one.one, ptr addrspace(1) undef
 
   %neg.one.two = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 2)
-  store volatile float %neg.one.two, float addrspace(1)* undef
+  store volatile float %neg.one.two, ptr addrspace(1) undef
 
   %neg.one.three = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 3)
-  store volatile float %neg.one.three, float addrspace(1)* undef
+  store volatile float %neg.one.three, ptr addrspace(1) undef
 
   %neg.one.negone = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -1)
-  store volatile float %neg.one.negone, float addrspace(1)* undef
+  store volatile float %neg.one.negone, ptr addrspace(1) undef
 
   %neg.one.min.exp = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -126)
-  store volatile float %neg.one.min.exp, float addrspace(1)* undef
+  store volatile float %neg.one.min.exp, ptr addrspace(1) undef
 
   %neg.one.min.exp.sub1 = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 -127)
-  store volatile float %neg.one.min.exp.sub1, float addrspace(1)* undef
+  store volatile float %neg.one.min.exp.sub1, ptr addrspace(1) undef
 
   %neg.one.max.exp = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 127)
-  store volatile float %neg.one.max.exp, float addrspace(1)* undef
+  store volatile float %neg.one.max.exp, ptr addrspace(1) undef
 
   %neg.one.max.exp.plus1 = call float @llvm.amdgcn.ldexp.f32(float -1.0, i32 128)
-  store volatile float %neg.one.max.exp.plus1, float addrspace(1)* undef
+  store volatile float %neg.one.max.exp.plus1, ptr addrspace(1) undef
 
   %fortytwo.seven = call float @llvm.amdgcn.ldexp.f32(float 42.0, i32 73)
-  store volatile float %fortytwo.seven, float addrspace(1)* undef
+  store volatile float %fortytwo.seven, ptr addrspace(1) undef
 
   ret void
 }
@@ -283,53 +283,53 @@ define void @ldexp_f32() {
 ; considers this.
 define void @ldexp_f32_denormal() {
 ; CHECK-LABEL: @ldexp_f32_denormal(
-; CHECK-NEXT:    store volatile float 0x380FFFFFC0000000, float addrspace(1)* undef, align 4
-; CHECK-NEXT:    store volatile float 0x381FFFFFC0000000, float addrspace(1)* undef, align 4
+; CHECK-NEXT:    store volatile float 0x380FFFFFC0000000, ptr addrspace(1) undef, align 4
+; CHECK-NEXT:    store volatile float 0x381FFFFFC0000000, ptr addrspace(1) undef, align 4
 ; CHECK-NEXT:    ret void
 ;
   %denormal.0 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 0)
-  store volatile float %denormal.0, float addrspace(1)* undef
+  store volatile float %denormal.0, ptr addrspace(1) undef
 
   %denormal.1 = call float @llvm.amdgcn.ldexp.f32(float 0x380FFFFFC0000000, i32 1)
-  store volatile float %denormal.1, float addrspace(1)* undef
+  store volatile float %denormal.1, ptr addrspace(1) undef
 
   ret void
 }
 
 define void @ldexp_f64() {
 ; CHECK-LABEL: @ldexp_f64(
-; CHECK-NEXT:    store volatile double 2.000000e+00, double addrspace(1)* undef, align 8
-; CHECK-NEXT:    store volatile double 4.000000e+00, double addrspace(1)* undef, align 8
-; CHECK-NEXT:    store volatile double 0x44D5000000000000, double addrspace(1)* undef, align 8
+; CHECK-NEXT:    store volatile double 2.000000e+00, ptr addrspace(1) undef, align 8
+; CHECK-NEXT:    store volatile double 4.000000e+00, ptr addrspace(1) undef, align 8
+; CHECK-NEXT:    store volatile double 0x44D5000000000000, ptr addrspace(1) undef, align 8
 ; CHECK-NEXT:    ret void
 ;
   %one.one = call double @llvm.amdgcn.ldexp.f64(double 1.0, i32 1)
-  store volatile double %one.one, double addrspace(1)* undef
+  store volatile double %one.one, ptr addrspace(1) undef
 
   %one.two = call double @llvm.amdgcn.ldexp.f64(double 1.0, i32 2)
-  store volatile double %one.two, double addrspace(1)* undef
+  store volatile double %one.two, ptr addrspace(1) undef
 
   %fortytwo.seven = call double @llvm.amdgcn.ldexp.f64(double 42.0, i32 73)
-  store volatile double %fortytwo.seven, double addrspace(1)* undef
+  store volatile double %fortytwo.seven, ptr addrspace(1) undef
 
   ret void
 }
 
 define void @ldexp_f16() {
 ; CHECK-LABEL: @ldexp_f16(
-; CHECK-NEXT:    store volatile half 0xH4000, half addrspace(1)* undef, align 2
-; CHECK-NEXT:    store volatile half 0xH4400, half addrspace(1)* undef, align 2
-; CHECK-NEXT:    store volatile half 0xH7C00, half addrspace(1)* undef, align 2
+; CHECK-NEXT:    store volatile half 0xH4000, ptr addrspace(1) undef, align 2
+; CHECK-NEXT:    store volatile half 0xH4400, ptr addrspace(1) undef, align 2
+; CHECK-NEXT:    store volatile half 0xH7C00, ptr addrspace(1) undef, align 2
 ; CHECK-NEXT:    ret void
 ;
   %one.one = call half @llvm.amdgcn.ldexp.f16(half 1.0, i32 1)
-  store volatile half %one.one, half addrspace(1)* undef
+  store volatile half %one.one, ptr addrspace(1) undef
 
   %one.two = call half @llvm.amdgcn.ldexp.f16(half 1.0, i32 2)
-  store volatile half %one.two, half addrspace(1)* undef
+  store volatile half %one.two, ptr addrspace(1) undef
 
   %fortytwo.seven = call half @llvm.amdgcn.ldexp.f16(half 42.0, i32 73)
-  store volatile half %fortytwo.seven, half addrspace(1)* undef
+  store volatile half %fortytwo.seven, ptr addrspace(1) undef
 
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/ARM/strcmp.ll b/llvm/test/Transforms/InstCombine/ARM/strcmp.ll
index 1fbcf1aba0a3c..cfcc767d3721a 100644
--- a/llvm/test/Transforms/InstCombine/ARM/strcmp.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/strcmp.ll
@@ -9,33 +9,31 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 @bell = constant [5 x i8] c"bell\00"
 @null = constant [1 x i8] zeroinitializer
 
-declare i32 @strcmp(i8*, i8*)
+declare i32 @strcmp(ptr, ptr)
 
 ; strcmp("", x) -> -*x
-define arm_aapcscc i32 @test1(i8* %str2) {
+define arm_aapcscc i32 @test1(ptr %str2) {
 ; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
+; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, ptr [[STR2:%.*]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 
-  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_apcscc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_apcscc i32 @strcmp(ptr @null, ptr %str2)
   ret i32 %temp1
 
 }
 
 ; strcmp(x, "") -> *x
-define arm_aapcscc i32 @test2(i8* %str1) {
+define arm_aapcscc i32 @test2(ptr %str1) {
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
+; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, ptr [[STR1:%.*]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 
-  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcscc i32 @strcmp(ptr %str1, ptr @null)
   ret i32 %temp1
 }
 
@@ -45,9 +43,7 @@ define arm_aapcscc i32 @test3() {
 ; CHECK-NEXT:    ret i32 -1
 ;
 
-  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
-  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcscc i32 @strcmp(ptr @hell, ptr @hello)
   ret i32 %temp1
 }
 
@@ -56,9 +52,7 @@ define arm_aapcscc i32 @test4() {
 ; CHECK-NEXT:    ret i32 1
 ;
 
-  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcscc i32 @strcmp(ptr @hell, ptr @null)
   ret i32 %temp1
 }
 
@@ -66,54 +60,49 @@ define arm_aapcscc i32 @test4() {
 ; (This transform is rather difficult to trigger in a useful manner)
 define arm_aapcscc i32 @test5(i1 %b) {
 ; CHECK-LABEL: @test5(
-; CHECK-NEXT:    [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
+; CHECK-NEXT:    [[STR2:%.*]] = select i1 [[B:%.*]], ptr @hell, ptr @bell
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr noundef nonnull dereferenceable(5) @hello, ptr noundef nonnull dereferenceable(5) [[STR2]], i32 5)
 ; CHECK-NEXT:    ret i32 [[MEMCMP]]
 ;
 
-  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
-  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
-  %str2 = select i1 %b, i8* %temp1, i8* %temp2
-  %temp3 = call arm_aapcscc i32 @strcmp(i8* %str1, i8* %str2)
+  %str2 = select i1 %b, ptr @hell, ptr @bell
+  %temp3 = call arm_aapcscc i32 @strcmp(ptr @hello, ptr %str2)
   ret i32 %temp3
 }
 
 ; strcmp(x,x)  -> 0
-define arm_aapcscc i32 @test6(i8* %str) {
+define arm_aapcscc i32 @test6(ptr %str) {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:    ret i32 0
 ;
 
-  %temp1 = call arm_aapcscc i32 @strcmp(i8* %str, i8* %str)
+  %temp1 = call arm_aapcscc i32 @strcmp(ptr %str, ptr %str)
   ret i32 %temp1
 }
 
 ; strcmp("", x) -> -*x
-define arm_aapcs_vfpcc i32 @test1_vfp(i8* %str2) {
+define arm_aapcs_vfpcc i32 @test1_vfp(ptr %str2) {
 ; CHECK-LABEL: @test1_vfp(
-; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, i8* [[STR2:%.*]], align 1
+; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, ptr [[STR2:%.*]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
 ; CHECK-NEXT:    [[TMP2:%.*]] = sub nsw i32 0, [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 
-  %str1 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @null, ptr %str2)
   ret i32 %temp1
 
 }
 
 ; strcmp(x, "") -> *x
-define arm_aapcs_vfpcc i32 @test2_vfp(i8* %str1) {
+define arm_aapcs_vfpcc i32 @test2_vfp(ptr %str1) {
 ; CHECK-LABEL: @test2_vfp(
-; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, i8* [[STR1:%.*]], align 1
+; CHECK-NEXT:    [[STRCMPLOAD:%.*]] = load i8, ptr [[STR1:%.*]], align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[STRCMPLOAD]] to i32
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 
-  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr %str1, ptr @null)
   ret i32 %temp1
 }
 
@@ -123,9 +112,7 @@ define arm_aapcs_vfpcc i32 @test3_vfp() {
 ; CHECK-NEXT:    ret i32 -1
 ;
 
-  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %str2 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
-  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hell, ptr @hello)
   ret i32 %temp1
 }
 
@@ -134,9 +121,7 @@ define arm_aapcs_vfpcc i32 @test4_vfp() {
 ; CHECK-NEXT:    ret i32 1
 ;
 
-  %str1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %str2 = getelementptr inbounds [1 x i8], [1 x i8]* @null, i32 0, i32 0
-  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hell, ptr @null)
   ret i32 %temp1
 }
 
@@ -144,25 +129,22 @@ define arm_aapcs_vfpcc i32 @test4_vfp() {
 ; (This transform is rather difficult to trigger in a useful manner)
 define arm_aapcs_vfpcc i32 @test5_vfp(i1 %b) {
 ; CHECK-LABEL: @test5_vfp(
-; CHECK-NEXT:    [[STR2:%.*]] = select i1 [[B:%.*]], i8* getelementptr inbounds ([5 x i8], [5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8], [5 x i8]* @bell, i32 0, i32 0)
-; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(i8* noundef nonnull dereferenceable(5) getelementptr inbounds ([6 x i8], [6 x i8]* @hello, i32 0, i32 0), i8* noundef nonnull dereferenceable(5) [[STR2]], i32 5)
+; CHECK-NEXT:    [[STR2:%.*]] = select i1 [[B:%.*]], ptr @hell, ptr @bell
+; CHECK-NEXT:    [[MEMCMP:%.*]] = call i32 @memcmp(ptr noundef nonnull dereferenceable(5) @hello, ptr noundef nonnull dereferenceable(5) [[STR2]], i32 5)
 ; CHECK-NEXT:    ret i32 [[MEMCMP]]
 ;
 
-  %str1 = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 0
-  %temp1 = getelementptr inbounds [5 x i8], [5 x i8]* @hell, i32 0, i32 0
-  %temp2 = getelementptr inbounds [5 x i8], [5 x i8]* @bell, i32 0, i32 0
-  %str2 = select i1 %b, i8* %temp1, i8* %temp2
-  %temp3 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str1, i8* %str2)
+  %str2 = select i1 %b, ptr @hell, ptr @bell
+  %temp3 = call arm_aapcs_vfpcc i32 @strcmp(ptr @hello, ptr %str2)
   ret i32 %temp3
 }
 
 ; strcmp(x,x)  -> 0
-define arm_aapcs_vfpcc i32 @test6_vfp(i8* %str) {
+define arm_aapcs_vfpcc i32 @test6_vfp(ptr %str) {
 ; CHECK-LABEL: @test6_vfp(
 ; CHECK-NEXT:    ret i32 0
 ;
 
-  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(i8* %str, i8* %str)
+  %temp1 = call arm_aapcs_vfpcc i32 @strcmp(ptr %str, ptr %str)
   ret i32 %temp1
 }
diff --git a/llvm/test/Transforms/InstCombine/ARM/strcpy.ll b/llvm/test/Transforms/InstCombine/ARM/strcpy.ll
index 2b2a68b6fef4d..519abf2bf78dd 100644
--- a/llvm/test/Transforms/InstCombine/ARM/strcpy.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/strcpy.ll
@@ -9,68 +9,58 @@ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 @a = common global [32 x i8] zeroinitializer, align 1
 @b = common global [32 x i8] zeroinitializer, align 1
 
-declare i8* @strcpy(i8*, i8*)
+declare ptr @strcpy(ptr, ptr)
 
 define arm_aapcscc void @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
-  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
 
-  call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  call arm_aapcscc ptr @strcpy(ptr @a, ptr @hello)
+; CHECK: @llvm.memcpy.p0.p0.i32
   ret void
 }
 
-define arm_aapcscc i8* @test_simplify2() {
+define arm_aapcscc ptr @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
 
-  %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %dst)
-; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
-  ret i8* %ret
+  %ret = call arm_aapcscc ptr @strcpy(ptr @a, ptr @a)
+; CHECK: ret ptr @a
+  ret ptr %ret
 }
 
-define arm_aapcscc i8* @test_no_simplify1() {
+define arm_aapcscc ptr @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
-  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
 
-  %ret = call arm_aapcscc i8* @strcpy(i8* %dst, i8* %src)
-; CHECK: call arm_aapcscc i8* @strcpy
-  ret i8* %ret
+  %ret = call arm_aapcscc ptr @strcpy(ptr @a, ptr @b)
+; CHECK: call arm_aapcscc ptr @strcpy
+  ret ptr %ret
 }
 
 define arm_aapcs_vfpcc void @test_simplify1_vfp() {
 ; CHECK-LABEL: @test_simplify1_vfp(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
-  %src = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
 
-  call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+  call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @hello)
+; CHECK: @llvm.memcpy.p0.p0.i32
   ret void
 }
 
-define arm_aapcs_vfpcc i8* @test_simplify2_vfp() {
+define arm_aapcs_vfpcc ptr @test_simplify2_vfp() {
 ; CHECK-LABEL: @test_simplify2_vfp(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
 
-  %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %dst)
-; CHECK: ret i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0)
-  ret i8* %ret
+  %ret = call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @a)
+; CHECK: ret ptr @a
+  ret ptr %ret
 }
 
-define arm_aapcs_vfpcc i8* @test_no_simplify1_vfp() {
+define arm_aapcs_vfpcc ptr @test_no_simplify1_vfp() {
 ; CHECK-LABEL: @test_no_simplify1_vfp(
 
-  %dst = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
-  %src = getelementptr [32 x i8], [32 x i8]* @b, i32 0, i32 0
 
-  %ret = call arm_aapcs_vfpcc i8* @strcpy(i8* %dst, i8* %src)
-; CHECK: call arm_aapcs_vfpcc i8* @strcpy
-  ret i8* %ret
+  %ret = call arm_aapcs_vfpcc ptr @strcpy(ptr @a, ptr @b)
+; CHECK: call arm_aapcs_vfpcc ptr @strcpy
+  ret ptr %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/ARM/vld1.ll b/llvm/test/Transforms/InstCombine/ARM/vld1.ll
index 05fff9c323600..7b4817a5d5cfb 100644
--- a/llvm/test/Transforms/InstCombine/ARM/vld1.ll
+++ b/llvm/test/Transforms/InstCombine/ARM/vld1.ll
@@ -9,110 +9,102 @@ target triple = "armv8-arm-none-eabi"
 ; constant, since we get constant-folding for free.
 
 ; Bail the optimization if the alignment is not a constant.
-define <2 x i64> @vld1_align(i8* %ptr, i32 %align) {
+define <2 x i64> @vld1_align(ptr %ptr, i32 %align) {
 ; CHECK-LABEL: @vld1_align(
-; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 [[ALIGN:%.*]])
+; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 [[ALIGN:%.*]])
 ; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
 ;
-  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 %align)
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 %align)
   ret <2 x i64> %vld1
 }
 
 ; Bail the optimization if the alignment is not power of 2.
-define <2 x i64> @vld1_align_pow2(i8* %ptr) {
+define <2 x i64> @vld1_align_pow2(ptr %ptr) {
 ; CHECK-LABEL: @vld1_align_pow2(
-; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[PTR:%.*]], i32 3)
+; CHECK-NEXT:    [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr [[PTR:%.*]], i32 3)
 ; CHECK-NEXT:    ret <2 x i64> [[VLD1]]
 ;
-  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 3)
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 3)
   ret <2 x i64> %vld1
 }
 
-define <8 x i8> @vld1_8x8(i8* %ptr) {
+define <8 x i8> @vld1_8x8(ptr %ptr) {
 ; CHECK-LABEL: @vld1_8x8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, <8 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
 ;
-  %vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %ptr, i32 1)
+  %vld1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr %ptr, i32 1)
   ret <8 x i8> %vld1
 }
 
-define <4 x i16> @vld1_4x16(i8* %ptr) {
+define <4 x i16> @vld1_4x16(ptr %ptr) {
 ; CHECK-LABEL: @vld1_4x16(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    ret <4 x i16> [[TMP2]]
 ;
-  %vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* %ptr, i32 2)
+  %vld1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr %ptr, i32 2)
   ret <4 x i16> %vld1
 }
 
-define <2 x i32> @vld1_2x32(i8* %ptr) {
+define <2 x i32> @vld1_2x32(ptr %ptr) {
 ; CHECK-LABEL: @vld1_2x32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, <2 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
-  %vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* %ptr, i32 4)
+  %vld1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr %ptr, i32 4)
   ret <2 x i32> %vld1
 }
 
-define <1 x i64> @vld1_1x64(i8* %ptr) {
+define <1 x i64> @vld1_1x64(ptr %ptr) {
 ; CHECK-LABEL: @vld1_1x64(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <1 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <1 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    ret <1 x i64> [[TMP2]]
 ;
-  %vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* %ptr, i32 8)
+  %vld1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr %ptr, i32 8)
   ret <1 x i64> %vld1
 }
 
-define <8 x i16> @vld1_8x16(i8* %ptr) {
+define <8 x i16> @vld1_8x16(ptr %ptr) {
 ; CHECK-LABEL: @vld1_8x16(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <8 x i16>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[PTR:%.*]], align 2
 ; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
 ;
-  %vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* %ptr, i32 2)
+  %vld1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr %ptr, i32 2)
   ret <8 x i16> %vld1
 }
 
-define <16 x i8> @vld1_16x8(i8* %ptr) {
+define <16 x i8> @vld1_16x8(ptr %ptr) {
 ; CHECK-LABEL: @vld1_16x8(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <16 x i8>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
 ; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
 ;
-  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %ptr, i32 1)
+  %vld1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr %ptr, i32 1)
   ret <16 x i8> %vld1
 }
 
-define <4 x i32> @vld1_4x32(i8* %ptr) {
+define <4 x i32> @vld1_4x32(ptr %ptr) {
 ; CHECK-LABEL: @vld1_4x32(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
 ;
-  %vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* %ptr, i32 4)
+  %vld1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr %ptr, i32 4)
   ret <4 x i32> %vld1
 }
 
-define <2 x i64> @vld1_2x64(i8* %ptr) {
+define <2 x i64> @vld1_2x64(ptr %ptr) {
 ; CHECK-LABEL: @vld1_2x64(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[PTR:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[PTR:%.*]], align 8
 ; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
 ;
-  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* %ptr, i32 8)
+  %vld1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr %ptr, i32 8)
   ret <2 x i64> %vld1
 }
 
-declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8*, i32)
-declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8*, i32)
-declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8*, i32)
-declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8*, i32)
-declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8*, i32)
-declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8*, i32)
-declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8*, i32)
-declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8*, i32)
+declare <8 x i8> @llvm.arm.neon.vld1.v8i8.p0(ptr, i32)
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16.p0(ptr, i32)
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32.p0(ptr, i32)
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64.p0(ptr, i32)
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16.p0(ptr, i32)
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8.p0(ptr, i32)
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32.p0(ptr, i32)
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64.p0(ptr, i32)
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll b/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
index ab991f472031a..4f53e1d0411dc 100644
--- a/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
+++ b/llvm/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
@@ -2,47 +2,44 @@
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
-declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
+declare <4 x i32> @llvm.ppc.altivec.lvx(ptr) #1
 
-define <4 x i32> @test1(<4 x i32>* %h) #0 {
+define <4 x i32> @test1(ptr %h) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %h1)
 
 ; CHECK-LABEL: @test1
 ; CHECK: @llvm.ppc.altivec.lvx
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
 
-define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 {
+define <4 x i32> @test1a(ptr align 16 %h) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(ptr %h1)
 
 ; CHECK-LABEL: @test1a
 ; CHECK-NOT: @llvm.ppc.altivec.lvx
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
 
-declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
+declare void @llvm.ppc.altivec.stvx(<4 x i32>, ptr) #0
 
-define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
+define <4 x i32> @test2(ptr %h, <4 x i32> %d) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, ptr %h1)
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2
@@ -50,13 +47,12 @@ entry:
 ; CHECK: ret <4 x i32>
 }
 
-define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+define <4 x i32> @test2a(ptr align 16 %h, <4 x i32> %d) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, ptr %h1)
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2
@@ -64,47 +60,44 @@ entry:
 ; CHECK: ret <4 x i32>
 }
 
-declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1
+declare <4 x i32> @llvm.ppc.altivec.lvxl(ptr) #1
 
-define <4 x i32> @test1l(<4 x i32>* %h) #0 {
+define <4 x i32> @test1l(ptr %h) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %h1)
 
 ; CHECK-LABEL: @test1l
 ; CHECK: @llvm.ppc.altivec.lvxl
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
 
-define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 {
+define <4 x i32> @test1la(ptr align 16 %h) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(ptr %h1)
 
 ; CHECK-LABEL: @test1la
 ; CHECK-NOT: @llvm.ppc.altivec.lvxl
 ; CHECK: ret <4 x i32>
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   %a = add <4 x i32> %v0, %vl
   ret <4 x i32> %a
 }
 
-declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0
+declare void @llvm.ppc.altivec.stvxl(<4 x i32>, ptr) #0
 
-define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 {
+define <4 x i32> @test2l(ptr %h, <4 x i32> %d) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, ptr %h1)
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2l
@@ -112,13 +105,12 @@ entry:
 ; CHECK: ret <4 x i32>
 }
 
-define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
+define <4 x i32> @test2la(ptr align 16 %h, <4 x i32> %d) #0 {
 entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
+  %h1 = getelementptr <4 x i32>, ptr %h, i64 1
+  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, ptr %h1)
 
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
+  %v0 = load <4 x i32>, ptr %h, align 8
   ret <4 x i32> %v0
 
 ; CHECK-LABEL: @test2l
diff --git a/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll b/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
index dbf501ed381b2..30ea01dc573c3 100644
--- a/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
+++ b/llvm/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
@@ -11,34 +11,30 @@ target triple = "powerpc64-unknown-linux-gnu"
 
 define void @test1() {
 entry:
-  %t1 = alloca <4 x float>*, align 8
-  %t2 = alloca <2 x double>*, align 8
-  store <4 x float>* @vf, <4 x float>** %t1, align 8
-  %0 = load <4 x float>*, <4 x float>** %t1, align 8
-  %1 = bitcast <4 x float>* %0 to i8*
-  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
-  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
-  %3 = load <4 x float>*, <4 x float>** %t1, align 8
-  %4 = bitcast <4 x float>* %3 to i8*
-  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
-  store <2 x double>* @vd, <2 x double>** %t2, align 8
-  %5 = load <2 x double>*, <2 x double>** %t2, align 8
-  %6 = bitcast <2 x double>* %5 to i8*
-  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
-  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
-  %8 = load <2 x double>*, <2 x double>** %t2, align 8
-  %9 = bitcast <2 x double>* %8 to i8*
-  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
+  %t1 = alloca ptr, align 8
+  %t2 = alloca ptr, align 8
+  store ptr @vf, ptr %t1, align 8
+  %0 = load ptr, ptr %t1, align 8
+  %1 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr %0)
+  store ptr @res_vf, ptr %t1, align 8
+  %2 = load ptr, ptr %t1, align 8
+  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %1, ptr %2)
+  store ptr @vd, ptr %t2, align 8
+  %3 = load ptr, ptr %t2, align 8
+  %4 = call <2 x double> @llvm.ppc.vsx.lxvd2x(ptr %3)
+  store ptr @res_vd, ptr %t2, align 8
+  %5 = load ptr, ptr %t2, align 8
+  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %4, ptr %5)
   ret void
 }
 
 ; CHECK-LABEL: @test1
-; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
-; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
-; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1
-; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
+; CHECK: %0 = load <4 x i32>, ptr @vf, align 1
+; CHECK: store <4 x i32> %0, ptr @res_vf, align 1
+; CHECK: %1 = load <2 x double>, ptr @vd, align 1
+; CHECK: store <2 x double> %1, ptr @res_vd, align 1
 
-declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
-declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
-declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
-declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)
+declare <4 x i32> @llvm.ppc.vsx.lxvw4x(ptr)
+declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, ptr)
+declare <2 x double> @llvm.ppc.vsx.lxvd2x(ptr)
+declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, ptr)
diff --git a/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
index dbd92044f9d7b..7d3645681e2d8 100644
--- a/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
+++ b/llvm/test/Transforms/InstCombine/SystemZ/libcall-arg-exts.ll
@@ -34,65 +34,59 @@ define fp128 @fun3(i8 zeroext %x) {
 
 @a = common global [60 x i8] zeroinitializer, align 1
 @b = common global [60 x i8] zeroinitializer, align 1
-declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
-define i8* @fun4() {
+declare ptr @__memccpy_chk(ptr, ptr, i32, i64, i64)
+define ptr @fun4() {
 ; CHECK-LABEL: @fun4
-; CHECK: call i8* @memccpy
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
-  ret i8* %ret
+; CHECK: call ptr @memccpy
+  %ret = call ptr @__memccpy_chk(ptr @a, ptr @b, i32 0, i64 60, i64 -1)
+  ret ptr %ret
 }
 
 %FILE = type { }
 @A = constant [2 x i8] c"A\00"
-declare i32 @fputs(i8*, %FILE*)
-define void @fun5(%FILE* %fp) {
+declare i32 @fputs(ptr, ptr)
+define void @fun5(ptr %fp) {
 ; CHECK-LABEL: @fun5
 ; CHECK: call i32 @fputc
-  %str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0
-  call i32 @fputs(i8* %str, %FILE* %fp)
+  call i32 @fputs(ptr @A, ptr %fp)
   ret void
 }
 
 @empty = constant [1 x i8] zeroinitializer
-declare i32 @puts(i8*)
+declare i32 @puts(ptr)
 define void @fun6() {
 ; CHECK-LABEL: @fun6
 ; CHECK: call i32 @putchar
-  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
-  call i32 @puts(i8* %str)
+  call i32 @puts(ptr @empty)
   ret void
 }
 
 @.str1 = private constant [2 x i8] c"a\00"
-declare i8* @strstr(i8*, i8*)
-define i8* @fun7(i8* %str) {
+declare ptr @strstr(ptr, ptr)
+define ptr @fun7(ptr %str) {
 ; CHECK-LABEL: @fun7
-; CHECK: call i8* @strchr
-  %pat = getelementptr inbounds [2 x i8], [2 x i8]* @.str1, i32 0, i32 0
-  %ret = call i8* @strstr(i8* %str, i8* %pat)
-  ret i8* %ret
+; CHECK: call ptr @strchr
+  %ret = call ptr @strstr(ptr %str, ptr @.str1)
+  ret ptr %ret
 }
 
-; CHECK: declare i8* @strchr(i8*, i32 signext)
+; CHECK: declare ptr @strchr(ptr, i32 signext)
 
 @hello = constant [14 x i8] c"hello world\5Cn\00"
-@chp = global i8* zeroinitializer
-declare i8* @strchr(i8*, i32)
+@chp = global ptr zeroinitializer
+declare ptr @strchr(ptr, i32)
 define void @fun8(i32 %chr) {
 ; CHECK-LABEL: @fun8
-; CHECK: call i8* @memchr
-  %src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
-  %dst = call i8* @strchr(i8* %src, i32 %chr)
-  store i8* %dst, i8** @chp
+; CHECK: call ptr @memchr
+  %dst = call ptr @strchr(ptr @hello, i32 %chr)
+  store ptr %dst, ptr @chp
   ret void
 }
 
 ; CHECK: declare double @ldexp(double, i32 signext)
 ; CHECK: declare float @ldexpf(float, i32 signext)
 ; CHECK: declare fp128 @ldexpl(fp128, i32 signext)
-; CHECK: declare i8* @memccpy(i8* noalias writeonly, i8* noalias nocapture readonly, i32 signext, i64)
-; CHECK: declare noundef i32 @fputc(i32 noundef signext, %FILE* nocapture noundef)
+; CHECK: declare ptr @memccpy(ptr noalias writeonly, ptr noalias nocapture readonly, i32 signext, i64)
+; CHECK: declare noundef i32 @fputc(i32 noundef signext, ptr nocapture noundef)
 ; CHECK: declare noundef i32 @putchar(i32 noundef signext)
-; CHECK: declare i8* @memchr(i8*, i32 signext, i64)
+; CHECK: declare ptr @memchr(ptr, i32 signext, i64)
diff --git a/llvm/test/Transforms/InstCombine/X86/addcarry.ll b/llvm/test/Transforms/InstCombine/X86/addcarry.ll
index 8260f05bdacc4..b9530695efcee 100644
--- a/llvm/test/Transforms/InstCombine/X86/addcarry.ll
+++ b/llvm/test/Transforms/InstCombine/X86/addcarry.ll
@@ -4,34 +4,34 @@
 declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
 declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)
 
-define i32 @no_carryin_i32(i32 %x, i32 %y, i8* %p) {
+define i32 @no_carryin_i32(i32 %x, i32 %y, ptr %p) {
 ; CHECK-LABEL: @no_carryin_i32(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
-; CHECK-NEXT:    store i8 [[TMP4]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    store i8 [[TMP4]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %s = call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %x, i32 %y)
   %ov = extractvalue { i8, i32 } %s, 0
-  store i8 %ov, i8* %p
+  store i8 %ov, ptr %p
   %r = extractvalue { i8, i32 } %s, 1
   ret i32 %r
 }
 
-define i64 @no_carryin_i64(i64 %x, i64 %y, i8* %p) {
+define i64 @no_carryin_i64(i64 %x, i64 %y, ptr %p) {
 ; CHECK-LABEL: @no_carryin_i64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
 ; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i8
-; CHECK-NEXT:    store i8 [[TMP4]], i8* [[P:%.*]], align 1
+; CHECK-NEXT:    store i8 [[TMP4]], ptr [[P:%.*]], align 1
 ; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %s = call { i8, i64 } @llvm.x86.addcarry.64(i8 0, i64 %x, i64 %y)
   %ov = extractvalue { i8, i64 } %s, 0
-  store i8 %ov, i8* %p
+  store i8 %ov, ptr %p
   %r = extractvalue { i8, i64 } %s, 1
   ret i64 %r
 }
diff --git a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
index 77859070aa9f5..f12cc1560c0a4 100644
--- a/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
+++ b/llvm/test/Transforms/InstCombine/X86/blend_x86.ll
@@ -209,14 +209,14 @@ define <16 x i8> @sel_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i1> %cond) {
 ; expected IR when 1 of the blend operands is a constant 0 vector. Potentially, this could
 ; be transformed to bitwise logic in IR, but currently that transform is left to the backend.
 
-define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x float> %z) {
+define <4 x float> @sel_v4f32_sse_reality(ptr %x, <4 x float> %y, <4 x float> %z) {
 ; CHECK-LABEL: @sel_v4f32_sse_reality(
-; CHECK-NEXT:    [[LD:%.*]] = load <4 x float>, <4 x float>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[LD:%.*]] = load <4 x float>, ptr [[X:%.*]], align 16
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x float> [[Z:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = select <4 x i1> [[CMP]], <4 x float> zeroinitializer, <4 x float> [[LD]]
 ; CHECK-NEXT:    ret <4 x float> [[R]]
 ;
-  %ld = load <4 x float>, <4 x float>* %x, align 16
+  %ld = load <4 x float>, ptr %x, align 16
   %cmp = fcmp olt <4 x float> %z, %y
   %sext = sext <4 x i1> %cmp to <4 x i32>
   %cond = bitcast <4 x i32> %sext to <4 x float>
@@ -224,14 +224,14 @@ define <4 x float> @sel_v4f32_sse_reality(<4 x float>* %x, <4 x float> %y, <4 x
   ret <4 x float> %r
 }
 
-define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
+define <2 x double> @sel_v2f64_sse_reality(ptr nocapture readonly %x, <2 x double> %y, <2 x double> %z) {
 ; CHECK-LABEL: @sel_v2f64_sse_reality(
-; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, <2 x double>* [[X:%.*]], align 16
+; CHECK-NEXT:    [[LD:%.*]] = load <2 x double>, ptr [[X:%.*]], align 16
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <2 x double> [[Z:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[CMP]], <2 x double> zeroinitializer, <2 x double> [[LD]]
 ; CHECK-NEXT:    ret <2 x double> [[R]]
 ;
-  %ld = load <2 x double>, <2 x double>* %x, align 16
+  %ld = load <2 x double>, ptr %x, align 16
   %cmp = fcmp olt <2 x double> %z, %y
   %sext = sext <2 x i1> %cmp to <2 x i64>
   %cond = bitcast <2 x i64> %sext to <2 x double>
@@ -241,10 +241,9 @@ define <2 x double> @sel_v2f64_sse_reality(<2 x double>* nocapture readonly %x,
 
 ; Bitcast the inputs and the result and remove the intrinsic.
 
-define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+define <2 x i64> @sel_v4i32_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: @sel_v4i32_sse_reality(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[LD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
+; CHECK-NEXT:    [[LD1:%.*]] = load <4 x i32>, ptr [[X:%.*]], align 16
 ; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <4 x i32>
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i32> [[YCAST]], [[ZCAST]]
@@ -252,8 +251,7 @@ define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i
 ; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
 ;
-  %xcast = bitcast <2 x i64>* %x to <16 x i8>*
-  %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+  %ld = load <16 x i8>, ptr %x, align 16
   %ycast = bitcast <2 x i64> %y to <4 x i32>
   %zcast = bitcast <2 x i64> %z to <4 x i32>
   %cmp = icmp sgt <4 x i32> %ycast, %zcast
@@ -264,10 +262,9 @@ define <2 x i64> @sel_v4i32_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i
   ret <2 x i64> %rcast
 }
 
-define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
+define <2 x i64> @sel_v16i8_sse_reality(ptr nocapture readonly %x, <2 x i64> %y, <2 x i64> %z) {
 ; CHECK-LABEL: @sel_v16i8_sse_reality(
-; CHECK-NEXT:    [[XCAST:%.*]] = bitcast <2 x i64>* [[X:%.*]] to <16 x i8>*
-; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, <16 x i8>* [[XCAST]], align 16
+; CHECK-NEXT:    [[LD:%.*]] = load <16 x i8>, ptr [[X:%.*]], align 16
 ; CHECK-NEXT:    [[YCAST:%.*]] = bitcast <2 x i64> [[Y:%.*]] to <16 x i8>
 ; CHECK-NEXT:    [[ZCAST:%.*]] = bitcast <2 x i64> [[Z:%.*]] to <16 x i8>
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i8> [[YCAST]], [[ZCAST]]
@@ -275,8 +272,7 @@ define <2 x i64> @sel_v16i8_sse_reality(<2 x i64>* nocapture readonly %x, <2 x i
 ; CHECK-NEXT:    [[RCAST:%.*]] = bitcast <16 x i8> [[R]] to <2 x i64>
 ; CHECK-NEXT:    ret <2 x i64> [[RCAST]]
 ;
-  %xcast = bitcast <2 x i64>* %x to <16 x i8>*
-  %ld = load <16 x i8>, <16 x i8>* %xcast, align 16
+  %ld = load <16 x i8>, ptr %x, align 16
   %ycast = bitcast <2 x i64> %y to <16 x i8>
   %zcast = bitcast <2 x i64> %z to <16 x i8>
   %cmp = icmp sgt <16 x i8> %ycast, %zcast
diff --git a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll
index 0fbfee1205fb6..6ea2403962bb9 100644
--- a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef-inseltpoison.ll
@@ -10,13 +10,13 @@ target triple = "i386-apple-darwin9"
 	%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
 	%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
 	%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
-	%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
-	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.ColorMatrix = type { ptr, %struct.ImagingColorScale }
+	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], ptr, i32, i32 }
 	%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
-	%struct.FixedFunction = type { %struct.PPStreamToken* }
+	%struct.FixedFunction = type { ptr }
 	%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
 	%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
-	%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
+	%struct.Histogram = type { ptr, i32, i16, i8, i8 }
 	%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
 	%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
 	%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
@@ -26,12 +26,12 @@ target triple = "i386-apple-darwin9"
 	%struct.LogicOp = type { i16, i8, i8 }
 	%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
 	%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
-	%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.Minmax = type { ptr, i16, i8, i8, [0 x i32] }
 	%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
-	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, ptr, ptr, i16, i16, i16, i16, [2 x float] }
 	%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
-	%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], ptr }
+	%struct.PixelMap = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
 	%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
 	%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
@@ -46,32 +46,32 @@ target triple = "i386-apple-darwin9"
 	%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
 	%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
 	%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
-	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, [4 x ptr], i32 }
 	%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
-	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
+	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, ptr, %struct.FixedFunction, [3 x i32], [3 x i32] }>
 	%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
 	%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
 	%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
 	%struct.TextureImageMode = type { float }
-	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, ptr }
 	%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
-	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
-	%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
-	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
+	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, ptr }
+	%struct.TextureRec = type { [4 x float], ptr, ptr, ptr, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, ptr, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], ptr, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
 	%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
 	%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
 	%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
 	%struct.IColor4 = type { float, float, float, float }
 	%struct.TCoord2 = type { float, float }
-	%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
-	%struct.VMTextures = type { [16 x %struct.TextureRec*] }
+	%struct.VMGPStack = type { [6 x ptr], ptr, i32, i32, ptr, ptr, i32, i32, i32, i32, i32, i32 }
+	%struct.VMTextures = type { [16 x ptr] }
 	%struct.PPStreamToken = type { { i16, i16, i32 } }
-	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { ptr, ptr, ptr, ptr, ptr } }
 
-define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk, <4 x float>* %src) nounwind {
+define i32 @foo(ptr %dst, ptr %prgrm, ptr %buffs, ptr %cnstn, ptr %pstrm, ptr %gpctx, ptr %txtrs, ptr %gpstk, ptr %src) nounwind {
 bb266.i:
-	getelementptr <4 x float>, <4 x float>* %src, i32 11		; <<4 x float>*>:0 [#uses=1]
-	load <4 x float>, <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
+	getelementptr <4 x float>, ptr %src, i32 11		; <ptr>:0 [#uses=1]
+	load <4 x float>, ptr %0, align 16		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> poison, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
 	shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> undef, <4 x float> poison, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
index 09f102c73413a..70bf627b80264 100644
--- a/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
+++ b/llvm/test/Transforms/InstCombine/X86/shufflemask-undef.ll
@@ -10,13 +10,13 @@ target triple = "i386-apple-darwin9"
 	%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
 	%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
 	%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
-	%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
-	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
+	%struct.ColorMatrix = type { ptr, %struct.ImagingColorScale }
+	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], ptr, i32, i32 }
 	%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
-	%struct.FixedFunction = type { %struct.PPStreamToken* }
+	%struct.FixedFunction = type { ptr }
 	%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
 	%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
-	%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
+	%struct.Histogram = type { ptr, i32, i16, i8, i8 }
 	%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
 	%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
 	%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
@@ -26,12 +26,12 @@ target triple = "i386-apple-darwin9"
 	%struct.LogicOp = type { i16, i8, i8 }
 	%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
 	%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
-	%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
+	%struct.Minmax = type { ptr, i16, i8, i8, [0 x i32] }
 	%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
-	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
+	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, ptr, ptr, i16, i16, i16, i16, [2 x float] }
 	%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
-	%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], ptr }
+	%struct.PixelMap = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
 	%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
 	%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
 	%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
@@ -46,32 +46,32 @@ target triple = "i386-apple-darwin9"
 	%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
 	%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
 	%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
-	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
+	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, ptr, ptr, ptr, [4 x ptr], i32 }
 	%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
-	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
+	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, ptr, %struct.FixedFunction, [3 x i32], [3 x i32] }>
 	%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
 	%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
 	%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
 	%struct.TextureImageMode = type { float }
-	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
+	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, ptr }
 	%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
-	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
-	%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
-	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
+	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, ptr }
+	%struct.TextureRec = type { [4 x float], ptr, ptr, ptr, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
+	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, ptr, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], ptr, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
 	%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
 	%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
 	%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
 	%struct.IColor4 = type { float, float, float, float }
 	%struct.TCoord2 = type { float, float }
-	%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
-	%struct.VMTextures = type { [16 x %struct.TextureRec*] }
+	%struct.VMGPStack = type { [6 x ptr], ptr, i32, i32, ptr, ptr, i32, i32, i32, i32, i32, i32 }
+	%struct.VMTextures = type { [16 x ptr] }
 	%struct.PPStreamToken = type { { i16, i16, i32 } }
-	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
+	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { ptr, ptr, ptr, ptr, ptr } }
 
-define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk, <4 x float>* %src) nounwind {
+define i32 @foo(ptr %dst, ptr %prgrm, ptr %buffs, ptr %cnstn, ptr %pstrm, ptr %gpctx, ptr %txtrs, ptr %gpstk, ptr %src) nounwind {
 bb266.i:
-	getelementptr <4 x float>, <4 x float>* %src, i32 11		; <<4 x float>*>:0 [#uses=1]
-	load <4 x float>, <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
+	getelementptr <4 x float>, ptr %src, i32 11		; <ptr>:0 [#uses=1]
+	load <4 x float>, ptr %0, align 16		; <<4 x float>>:1 [#uses=1]
 	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
 	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
 	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
index 276c69660c5cd..07465a041e2b2 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub-inseltpoison.ll
@@ -140,16 +140,16 @@ define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) {
   ret float %8
 }
 
-define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, float* %5) {
+define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr %5) {
 ; CHECK-LABEL: @PR46277(
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> poison, float [[TMP0:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5:%.*]], i64 1
-; CHECK-NEXT:    store float [[TMP10]], float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5:%.*]], i64 1
+; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
-; CHECK-NEXT:    store float [[TMP12]], float* [[TMP11]], align 4
+; CHECK-NEXT:    store float [[TMP12]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %7 = insertelement <4 x float> poison, float %0, i32 0
@@ -158,10 +158,10 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo
   %10 = insertelement <4 x float> %9, float %3, i32 3
   %11 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %10, <4 x float> %4)
   %12 = extractelement <4 x float> %11, i32 0
-  %13 = getelementptr inbounds float, float* %5, i64 1
-  store float %12, float* %5, align 4
+  %13 = getelementptr inbounds float, ptr %5, i64 1
+  store float %12, ptr %5, align 4
   %14 = extractelement <4 x float> %11, i32 1
-  store float %14, float* %13, align 4
+  store float %14, ptr %13, align 4
   ret void
 }
 
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
index 8dda1de57134d..c54f3f27d4a4e 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-addsub.ll
@@ -140,16 +140,16 @@ define float @elts_addsub_v8f32_sub(<8 x float> %0, <8 x float> %1) {
   ret float %8
 }
 
-define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, float* %5) {
+define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, ptr %5) {
 ; CHECK-LABEL: @PR46277(
 ; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x float> undef, float [[TMP0:%.*]], i64 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP1:%.*]], i64 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> [[TMP8]], <4 x float> [[TMP4:%.*]])
 ; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x float> [[TMP9]], i64 0
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[TMP5:%.*]], i64 1
-; CHECK-NEXT:    store float [[TMP10]], float* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, ptr [[TMP5:%.*]], i64 1
+; CHECK-NEXT:    store float [[TMP10]], ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x float> [[TMP9]], i64 1
-; CHECK-NEXT:    store float [[TMP12]], float* [[TMP11]], align 4
+; CHECK-NEXT:    store float [[TMP12]], ptr [[TMP11]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %7 = insertelement <4 x float> undef, float %0, i32 0
@@ -158,10 +158,10 @@ define void @PR46277(float %0, float %1, float %2, float %3, <4 x float> %4, flo
   %10 = insertelement <4 x float> %9, float %3, i32 3
   %11 = tail call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %10, <4 x float> %4)
   %12 = extractelement <4 x float> %11, i32 0
-  %13 = getelementptr inbounds float, float* %5, i64 1
-  store float %12, float* %5, align 4
+  %13 = getelementptr inbounds float, ptr %5, i64 1
+  store float %12, ptr %5, align 4
   %14 = extractelement <4 x float> %11, i32 1
-  store float %14, float* %13, align 4
+  store float %14, ptr %13, align 4
   ret void
 }
 
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-amx-load-store.ll b/llvm/test/Transforms/InstCombine/X86/x86-amx-load-store.ll
index d17f337679527..ae239b62bf225 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-amx-load-store.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-amx-load-store.ll
@@ -2,36 +2,36 @@
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 ; Prohibit poiter cast for amx.
-define dso_local void @test_amx_load_store(<256 x i32>* %src, i8* %dst) {
+define dso_local void @test_amx_load_store(ptr %src, ptr %dst) {
 ; CHECK-LABEL: @test_amx_load_store(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VEC:%.*]] = load <256 x i32>, <256 x i32>* [[SRC:%.*]], align 64
+; CHECK-NEXT:    [[VEC:%.*]] = load <256 x i32>, ptr [[SRC:%.*]], align 64
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast <256 x i32> [[VEC]] to x86_amx
-; CHECK-NEXT:    tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, i8* [[DST:%.*]], i64 64, x86_amx [[BC]])
+; CHECK-NEXT:    tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, ptr [[DST:%.*]], i64 64, x86_amx [[BC]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %vec = load <256 x i32>, <256 x i32>* %src, align 64
+  %vec = load <256 x i32>, ptr %src, align 64
   %bc = bitcast <256 x i32> %vec to x86_amx
-  tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, i8* %dst, i64 64, x86_amx %bc)
+  tail call void @llvm.x86.tilestored64.internal(i16 16, i16 16, ptr %dst, i64 64, x86_amx %bc)
   ret void
 }
 
 ; Prohibit poiter cast for amx.
-define dso_local void @test_amx_load_store2(<256 x i32>* %dst, i8* %src) {
+define dso_local void @test_amx_load_store2(ptr %dst, ptr %src) {
 ; CHECK-LABEL: @test_amx_load_store2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[AMX:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, i8* [[SRC:%.*]], i64 64)
+; CHECK-NEXT:    [[AMX:%.*]] = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, ptr [[SRC:%.*]], i64 64)
 ; CHECK-NEXT:    [[BC:%.*]] = bitcast x86_amx [[AMX]] to <256 x i32>
-; CHECK-NEXT:    store <256 x i32> [[BC]], <256 x i32>* [[DST:%.*]], align 1024
+; CHECK-NEXT:    store <256 x i32> [[BC]], ptr [[DST:%.*]], align 1024
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %amx = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, i8* %src, i64 64)
+  %amx = tail call x86_amx @llvm.x86.tileloadd64.internal(i16 16, i16 16, ptr %src, i64 64)
   %bc = bitcast x86_amx %amx to <256 x i32>
-  store <256 x i32> %bc, <256 x i32>* %dst
+  store <256 x i32> %bc, ptr %dst
   ret void
 }
 
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
-declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
+declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
+declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-amx.ll b/llvm/test/Transforms/InstCombine/X86/x86-amx.ll
index 84b5a73a795d8..a4c6770b4cb7b 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-amx.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-amx.ll
@@ -1,24 +1,24 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
-define void @foo(<256 x i32>* %arrayidx16, <256 x i32>* %arrayidx29, <256 x i32>* %arrayidx35, i1 %c1, i1 %c2) {
+define void @foo(ptr %arrayidx16, ptr %arrayidx29, ptr %arrayidx35, i1 %c1, i1 %c2) {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_COND9:%.*]]
 ; CHECK:       for.cond9:
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[FOR_BODY14:%.*]], label [[EXIT:%.*]]
 ; CHECK:       for.body14:
-; CHECK-NEXT:    [[T5:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX16:%.*]], align 64
+; CHECK-NEXT:    [[T5:%.*]] = load <256 x i32>, ptr [[ARRAYIDX16:%.*]], align 64
 ; CHECK-NEXT:    br label [[FOR_COND18:%.*]]
 ; CHECK:       for.cond18:
 ; CHECK-NEXT:    [[SUB_C_SROA_0_0:%.*]] = phi <256 x i32> [ [[T5]], [[FOR_BODY14]] ], [ [[T12:%.*]], [[FOR_BODY24:%.*]] ]
 ; CHECK-NEXT:    br i1 [[C2:%.*]], label [[FOR_BODY24]], label [[FOR_COND_CLEANUP23:%.*]]
 ; CHECK:       for.cond.cleanup23:
-; CHECK-NEXT:    store <256 x i32> [[SUB_C_SROA_0_0]], <256 x i32>* [[ARRAYIDX16]], align 64
+; CHECK-NEXT:    store <256 x i32> [[SUB_C_SROA_0_0]], ptr [[ARRAYIDX16]], align 64
 ; CHECK-NEXT:    br label [[FOR_COND9]]
 ; CHECK:       for.body24:
-; CHECK-NEXT:    [[T6:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX29:%.*]], align 64
-; CHECK-NEXT:    [[T7:%.*]] = load <256 x i32>, <256 x i32>* [[ARRAYIDX35:%.*]], align 64
+; CHECK-NEXT:    [[T6:%.*]] = load <256 x i32>, ptr [[ARRAYIDX29:%.*]], align 64
+; CHECK-NEXT:    [[T7:%.*]] = load <256 x i32>, ptr [[ARRAYIDX35:%.*]], align 64
 ; CHECK-NEXT:    [[T8:%.*]] = bitcast <256 x i32> [[SUB_C_SROA_0_0]] to x86_amx
 ; CHECK-NEXT:    [[T9:%.*]] = bitcast <256 x i32> [[T6]] to x86_amx
 ; CHECK-NEXT:    [[T10:%.*]] = bitcast <256 x i32> [[T7]] to x86_amx
@@ -34,7 +34,7 @@ for.cond9:                                        ; preds = %for.cond, %for.cond
   br i1 %c1, label %for.body14, label %exit
 
 for.body14:
-  %t5 = load <256 x i32>, <256 x i32>* %arrayidx16, align 64
+  %t5 = load <256 x i32>, ptr %arrayidx16, align 64
   br label %for.cond18
 
 for.cond18:                                       ; preds = %for.body24, %for.body14
@@ -42,12 +42,12 @@ for.cond18:                                       ; preds = %for.body24, %for.bo
   br i1 %c2, label %for.body24, label %for.cond.cleanup23
 
 for.cond.cleanup23:                               ; preds = %for.cond18
-  store <256 x i32> %sub_c.sroa.0.0, <256 x i32>* %arrayidx16, align 64
+  store <256 x i32> %sub_c.sroa.0.0, ptr %arrayidx16, align 64
   br label %for.cond9
 
 for.body24:                                       ; preds = %for.cond18
-  %t6 = load <256 x i32>, <256 x i32>* %arrayidx29, align 64
-  %t7 = load <256 x i32>, <256 x i32>* %arrayidx35, align 64
+  %t6 = load <256 x i32>, ptr %arrayidx29, align 64
+  %t7 = load <256 x i32>, ptr %arrayidx35, align 64
   %t8 = bitcast <256 x i32> %sub_c.sroa.0.0 to x86_amx
   %t9 = bitcast <256 x i32> %t6 to x86_amx
   %t10 = bitcast <256 x i32> %t7 to x86_amx
@@ -59,6 +59,6 @@ exit:
   ret void
 }
 
-declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, i8*, i64)
+declare x86_amx @llvm.x86.tileloadd64.internal(i16, i16, ptr, i64)
 declare x86_amx @llvm.x86.tdpbssd.internal(i16, i16, i16, x86_amx, x86_amx, x86_amx)
-declare void @llvm.x86.tilestored64.internal(i16, i16, i8*, i64, x86_amx)
+declare void @llvm.x86.tilestored64.internal(i16, i16, ptr, i64, x86_amx)
diff --git a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
index 1ddd18db40a7a..b092b1db68ec0 100644
--- a/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
+++ b/llvm/test/Transforms/InstCombine/X86/x86-masked-memops.ll
@@ -5,105 +5,99 @@
 
 ; If the mask isn't constant, do nothing.
 
-define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
+define <4 x float> @mload(ptr %f, <4 x i32> %mask) {
 ; CHECK-LABEL: @mload(
-; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* [[F:%.*]], <4 x i32> [[MASK:%.*]])
+; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr [[F:%.*]], <4 x i32> [[MASK:%.*]])
 ; CHECK-NEXT:    ret <4 x float> [[LD]]
 ;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> %mask)
   ret <4 x float> %ld
 }
 
 ; If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further.
 
-define <4 x float> @mload_v4f32_cmp(i8* %f, <4 x i32> %src) {
+define <4 x float> @mload_v4f32_cmp(ptr %f, <4 x i32> %src) {
 ; CHECK-LABEL: @mload_v4f32_cmp(
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp ne <4 x i32> [[SRC:%.*]], zeroinitializer
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> [[ICMP]], <4 x float> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]], <4 x float> zeroinitializer)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
   %icmp = icmp ne <4 x i32> %src, zeroinitializer
   %mask = sext <4 x i1> %icmp to <4 x i32>
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> %mask)
   ret <4 x float> %ld
 }
 
 ; Zero mask returns a zero vector.
 
-define <4 x float> @mload_zeros(i8* %f) {
+define <4 x float> @mload_zeros(ptr %f) {
 ; CHECK-LABEL: @mload_zeros(
 ; CHECK-NEXT:    ret <4 x float> zeroinitializer
 ;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> zeroinitializer)
   ret <4 x float> %ld
 }
 
 ; Only the sign bit matters.
 
-define <4 x float> @mload_fake_ones(i8* %f) {
+define <4 x float> @mload_fake_ones(ptr %f) {
 ; CHECK-LABEL: @mload_fake_ones(
 ; CHECK-NEXT:    ret <4 x float> zeroinitializer
 ;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
   ret <4 x float> %ld
 }
 
 ; All mask bits are set, so this is just a vector load.
 
-define <4 x float> @mload_real_ones(i8* %f) {
+define <4 x float> @mload_real_ones(ptr %f) {
 ; CHECK-LABEL: @mload_real_ones(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x float>, <4 x float>* [[CASTVEC]], align 1
+; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x float>, ptr [[F:%.*]], align 1
 ; CHECK-NEXT:    ret <4 x float> [[UNMASKEDLOAD]]
 ;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
   ret <4 x float> %ld
 }
 
 ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
 
-define <4 x float> @mload_one_one(i8* %f) {
+define <4 x float> @mload_one_one(ptr %f) {
 ; CHECK-LABEL: @mload_one_one(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[F:%.*]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>)
 ; CHECK-NEXT:    ret <4 x float> [[TMP1]]
 ;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(ptr %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
   ret <4 x float> %ld
 }
 
 ; Try doubles.
 
-define <2 x double> @mload_one_one_double(i8* %f) {
+define <2 x double> @mload_one_one_double(ptr %f) {
 ; CHECK-LABEL: @mload_one_one_double(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> <double poison, double 0.000000e+00>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0(ptr [[F:%.*]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> <double poison, double 0.000000e+00>)
 ; CHECK-NEXT:    ret <2 x double> [[TMP1]]
 ;
-  %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> <i64 -1, i64 0>)
+  %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(ptr %f, <2 x i64> <i64 -1, i64 0>)
   ret <2 x double> %ld
 }
 
 ; Try 256-bit FP ops.
 
-define <8 x float> @mload_v8f32(i8* %f) {
+define <8 x float> @mload_v8f32(ptr %f) {
 ; CHECK-LABEL: @mload_v8f32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[F:%.*]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>)
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
-  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
   ret <8 x float> %ld
 }
 
-define <8 x float> @mload_v8f32_cmp(i8* %f, <8 x float> %src0, <8 x float> %src1) {
+define <8 x float> @mload_v8f32_cmp(ptr %f, <8 x float> %src0, <8 x float> %src1) {
 ; CHECK-LABEL: @mload_v8f32_cmp(
 ; CHECK-NEXT:    [[ICMP0:%.*]] = fcmp one <8 x float> [[SRC0:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[ICMP1:%.*]] = fcmp one <8 x float> [[SRC1:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[MASK1:%.*]] = and <8 x i1> [[ICMP0]], [[ICMP1]]
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> [[MASK1]], <8 x float> zeroinitializer)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0(ptr [[F:%.*]], i32 1, <8 x i1> [[MASK1]], <8 x float> zeroinitializer)
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
   %icmp0 = fcmp one <8 x float> %src0, zeroinitializer
@@ -111,72 +105,67 @@ define <8 x float> @mload_v8f32_cmp(i8* %f, <8 x float> %src0, <8 x float> %src1
   %ext0 = sext <8 x i1> %icmp0 to <8 x i32>
   %ext1 = sext <8 x i1> %icmp1 to <8 x i32>
   %mask = and <8 x i32> %ext0, %ext1
-  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> %mask)
+  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(ptr %f, <8 x i32> %mask)
   ret <8 x float> %ld
 }
 
-define <4 x double> @mload_v4f64(i8* %f) {
+define <4 x double> @mload_v4f64(ptr %f) {
 ; CHECK-LABEL: @mload_v4f64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0(ptr [[F:%.*]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> <double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>)
 ; CHECK-NEXT:    ret <4 x double> [[TMP1]]
 ;
-  %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+  %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(ptr %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
   ret <4 x double> %ld
 }
 
 ; Try the AVX2 variants.
 
-define <4 x i32> @mload_v4i32(i8* %f) {
+define <4 x i32> @mload_v4i32(ptr %f) {
 ; CHECK-LABEL: @mload_v4i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr [[F:%.*]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 0, i32 0, i32 0, i32 poison>)
 ; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
-  %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+  %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(ptr %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
   ret <4 x i32> %ld
 }
 
-define <2 x i64> @mload_v2i64(i8* %f) {
+define <2 x i64> @mload_v2i64(ptr %f) {
 ; CHECK-LABEL: @mload_v2i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> <i64 poison, i64 0>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr [[F:%.*]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> <i64 poison, i64 0>)
 ; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
 ;
-  %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> <i64 -1, i64 0>)
+  %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(ptr %f, <2 x i64> <i64 -1, i64 0>)
   ret <2 x i64> %ld
 }
 
-define <8 x i32> @mload_v8i32(i8* %f) {
+define <8 x i32> @mload_v8i32(ptr %f) {
 ; CHECK-LABEL: @mload_v8i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> <i32 0, i32 0, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[F:%.*]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> <i32 0, i32 0, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0>)
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
-  %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+  %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
   ret <8 x i32> %ld
 }
 
-define <4 x i64> @mload_v4i64(i8* %f) {
+define <4 x i64> @mload_v4i64(ptr %f) {
 ; CHECK-LABEL: @mload_v4i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> <i64 poison, i64 0, i64 0, i64 0>)
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr [[F:%.*]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> <i64 poison, i64 0, i64 0, i64 0>)
 ; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
 ;
-  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
   ret <4 x i64> %ld
 }
 
-define <4 x i64> @mload_v4i64_cmp(i8* %f, <4 x i64> %src) {
+define <4 x i64> @mload_v4i64_cmp(ptr %f, <4 x i64> %src) {
 ; CHECK-LABEL: @mload_v4i64_cmp(
 ; CHECK-NEXT:    [[SRC_LOBIT:%.*]] = ashr <4 x i64> [[SRC:%.*]], <i64 63, i64 63, i64 63, i64 63>
 ; CHECK-NEXT:    [[SRC_LOBIT_NOT:%.*]] = xor <4 x i64> [[SRC_LOBIT]], <i64 -1, i64 -1, i64 -1, i64 -1>
-; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* [[F:%.*]], <4 x i64> [[SRC_LOBIT_NOT]])
+; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr [[F:%.*]], <4 x i64> [[SRC_LOBIT_NOT]])
 ; CHECK-NEXT:    ret <4 x i64> [[LD]]
 ;
   %icmp = icmp sge <4 x i64> %src, zeroinitializer
   %mask = sext <4 x i1> %icmp to <4 x i64>
-  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> %mask)
+  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr %f, <4 x i64> %mask)
   ret <4 x i64> %ld
 }
 
@@ -184,172 +173,161 @@ define <4 x i64> @mload_v4i64_cmp(i8* %f, <4 x i64> %src) {
 
 ; If the mask isn't constant, do nothing.
 
-define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) {
+define void @mstore(ptr %f, <4 x i32> %mask, <4 x float> %v) {
 ; CHECK-LABEL: @mstore(
-; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.ps(i8* [[F:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[V:%.*]])
+; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.ps(ptr [[F:%.*]], <4 x i32> [[MASK:%.*]], <4 x float> [[V:%.*]])
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> %mask, <4 x float> %v)
   ret void
 }
 
 ; If the mask comes from a comparison, convert to an LLVM intrinsic. The backend should optimize further.
 
-define void @mstore_v4f32_cmp(i8* %f, <4 x i32> %src, <4 x float> %v) {
+define void @mstore_v4f32_cmp(ptr %f, <4 x i32> %src, <4 x float> %v) {
 ; CHECK-LABEL: @mstore_v4f32_cmp(
 ; CHECK-NEXT:    [[ICMP:%.*]] = icmp eq <4 x i32> [[SRC:%.*]], zeroinitializer
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], i32 1, <4 x i1> [[ICMP]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> [[ICMP]])
 ; CHECK-NEXT:    ret void
 ;
   %icmp = icmp eq <4 x i32> %src, zeroinitializer
   %mask = sext <4 x i1> %icmp to <4 x i32>
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> %mask, <4 x float> %v)
   ret void
 }
 
 ; Zero mask is a nop.
 
-define void @mstore_zeros(i8* %f, <4 x float> %v)  {
+define void @mstore_zeros(ptr %f, <4 x float> %v)  {
 ; CHECK-LABEL: @mstore_zeros(
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> zeroinitializer, <4 x float> %v)
   ret void
 }
 
 ; Only the sign bit matters.
 
-define void @mstore_fake_ones(i8* %f, <4 x float> %v) {
+define void @mstore_fake_ones(ptr %f, <4 x float> %v) {
 ; CHECK-LABEL: @mstore_fake_ones(
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
   ret void
 }
 
 ; All mask bits are set, so this is just a vector store.
 
-define void @mstore_real_ones(i8* %f, <4 x float> %v) {
+define void @mstore_real_ones(ptr %f, <4 x float> %v) {
 ; CHECK-LABEL: @mstore_real_ones(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    store <4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], align 1
+; CHECK-NEXT:    store <4 x float> [[V:%.*]], ptr [[F:%.*]], align 1
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
   ret void
 }
 
 ; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
 
-define void @mstore_one_one(i8* %f, <4 x float> %v) {
+define void @mstore_one_one(ptr %f, <4 x float> %v) {
 ; CHECK-LABEL: @mstore_one_one(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x float>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> [[V:%.*]], <4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0(<4 x float> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps(ptr %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
   ret void
 }
 
 ; Try doubles.
 
-define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
+define void @mstore_one_one_double(ptr %f, <2 x double> %v) {
 ; CHECK-LABEL: @mstore_one_one_double(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> [[V:%.*]], <2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0(<2 x double> [[V:%.*]], ptr [[F:%.*]], i32 1, <2 x i1> <i1 true, i1 false>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
+  tail call void @llvm.x86.avx.maskstore.pd(ptr %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
   ret void
 }
 
 ; Try 256-bit FP ops.
 
-define void @mstore_v8f32(i8* %f, <8 x float> %v) {
+define void @mstore_v8f32(ptr %f, <8 x float> %v) {
 ; CHECK-LABEL: @mstore_v8f32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x float>*
-; CHECK-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> [[V:%.*]], <8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.store.v8f32.p0(<8 x float> [[V:%.*]], ptr [[F:%.*]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
+  tail call void @llvm.x86.avx.maskstore.ps.256(ptr %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
   ret void
 }
 
-define void @mstore_v4f64(i8* %f, <4 x double> %v) {
+define void @mstore_v4f64(ptr %f, <4 x double> %v) {
 ; CHECK-LABEL: @mstore_v4f64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> [[V:%.*]], <4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0(<4 x double> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
+  tail call void @llvm.x86.avx.maskstore.pd.256(ptr %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
   ret void
 }
 
-define void @mstore_v4f64_cmp(i8* %f, <4 x i32> %src, <4 x double> %v) {
+define void @mstore_v4f64_cmp(ptr %f, <4 x i32> %src, <4 x double> %v) {
 ; CHECK-LABEL: @mstore_v4f64_cmp(
 ; CHECK-NEXT:    [[SRC_LOBIT:%.*]] = ashr <4 x i32> [[SRC:%.*]], <i32 31, i32 31, i32 31, i32 31>
 ; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[SRC_LOBIT]], <i32 -1, i32 -1, i32 -1, i32 -1>
 ; CHECK-NEXT:    [[DOTNOT:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
-; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.pd.256(i8* [[F:%.*]], <4 x i64> [[DOTNOT]], <4 x double> [[V:%.*]])
+; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.pd.256(ptr [[F:%.*]], <4 x i64> [[DOTNOT]], <4 x double> [[V:%.*]])
 ; CHECK-NEXT:    ret void
 ;
   %icmp = icmp sge <4 x i32> %src, zeroinitializer
   %mask = sext <4 x i1> %icmp to <4 x i64>
-  tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> %mask, <4 x double> %v)
+  tail call void @llvm.x86.avx.maskstore.pd.256(ptr %f, <4 x i64> %mask, <4 x double> %v)
   ret void
 }
 
 ; Try the AVX2 variants.
 
-define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
+define void @mstore_v4i32(ptr %f, <4 x i32> %v) {
 ; CHECK-LABEL: @mstore_v4i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> [[V:%.*]], <4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0(<4 x i32> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
+  tail call void @llvm.x86.avx2.maskstore.d(ptr %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
   ret void
 }
 
-define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
+define void @mstore_v2i64(ptr %f, <2 x i64> %v) {
 ; CHECK-LABEL: @mstore_v2i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <2 x i64>*
-; CHECK-NEXT:    call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> [[V:%.*]], <2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT:    call void @llvm.masked.store.v2i64.p0(<2 x i64> [[V:%.*]], ptr [[F:%.*]], i32 1, <2 x i1> <i1 true, i1 false>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx2.maskstore.q(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
+  tail call void @llvm.x86.avx2.maskstore.q(ptr %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
   ret void
 
 }
 
-define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
+define void @mstore_v8i32(ptr %f, <8 x i32> %v) {
 ; CHECK-LABEL: @mstore_v8i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <8 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> [[V:%.*]], <8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[V:%.*]], ptr [[F:%.*]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
+  tail call void @llvm.x86.avx2.maskstore.d.256(ptr %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
   ret void
 }
 
-define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
+define void @mstore_v4i64(ptr %f, <4 x i64> %v) {
 ; CHECK-LABEL: @mstore_v4i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> [[V:%.*]], <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
+  tail call void @llvm.x86.avx2.maskstore.q.256(ptr %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
   ret void
 }
 
-define void @mstore_v4i64_cmp(i8* %f, <4 x i64> %src0, <4 x i64> %src1, <4 x i64> %v) {
+define void @mstore_v4i64_cmp(ptr %f, <4 x i64> %src0, <4 x i64> %src1, <4 x i64> %v) {
 ; CHECK-LABEL: @mstore_v4i64_cmp(
 ; CHECK-NEXT:    [[ICMP0:%.*]] = icmp eq <4 x i64> [[SRC0:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[ICMP1:%.*]] = icmp ne <4 x i64> [[SRC1:%.*]], zeroinitializer
 ; CHECK-NEXT:    [[MASK1:%.*]] = and <4 x i1> [[ICMP0]], [[ICMP1]]
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* [[F:%.*]] to <4 x i64>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> [[V:%.*]], <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> [[MASK1]])
+; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0(<4 x i64> [[V:%.*]], ptr [[F:%.*]], i32 1, <4 x i1> [[MASK1]])
 ; CHECK-NEXT:    ret void
 ;
   %icmp0 = icmp eq <4 x i64> %src0, zeroinitializer
@@ -357,38 +335,38 @@ define void @mstore_v4i64_cmp(i8* %f, <4 x i64> %src0, <4 x i64> %src1, <4 x i64
   %ext0 = sext <4 x i1> %icmp0 to <4 x i64>
   %ext1 = sext <4 x i1> %icmp1 to <4 x i64>
   %mask = and <4 x i64> %ext0, %ext1
-  tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> %mask, <4 x i64> %v)
+  tail call void @llvm.x86.avx2.maskstore.q.256(ptr %f, <4 x i64> %mask, <4 x i64> %v)
   ret void
 }
 
 ; The original SSE2 masked store variant.
 
-define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
+define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, ptr %p) {
 ; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
+  tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, ptr %p)
   ret void
 }
 
-declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
-declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
-declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>)
+declare <4 x float> @llvm.x86.avx.maskload.ps(ptr, <4 x i32>)
+declare <2 x double> @llvm.x86.avx.maskload.pd(ptr, <2 x i64>)
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(ptr, <8 x i32>)
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(ptr, <4 x i64>)
 
-declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>)
-declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>)
-declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>)
-declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>)
+declare <4 x i32> @llvm.x86.avx2.maskload.d(ptr, <4 x i32>)
+declare <2 x i64> @llvm.x86.avx2.maskload.q(ptr, <2 x i64>)
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(ptr, <8 x i32>)
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(ptr, <4 x i64>)
 
-declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>)
-declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>)
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)
-declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>)
+declare void @llvm.x86.avx.maskstore.ps(ptr, <4 x i32>, <4 x float>)
+declare void @llvm.x86.avx.maskstore.pd(ptr, <2 x i64>, <2 x double>)
+declare void @llvm.x86.avx.maskstore.ps.256(ptr, <8 x i32>, <8 x float>)
+declare void @llvm.x86.avx.maskstore.pd.256(ptr, <4 x i64>, <4 x double>)
 
-declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>)
-declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
-declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
-declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
+declare void @llvm.x86.avx2.maskstore.d(ptr, <4 x i32>, <4 x i32>)
+declare void @llvm.x86.avx2.maskstore.q(ptr, <2 x i64>, <2 x i64>)
+declare void @llvm.x86.avx2.maskstore.d.256(ptr, <8 x i32>, <8 x i32>)
+declare void @llvm.x86.avx2.maskstore.q.256(ptr, <4 x i64>, <4 x i64>)
 
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, ptr)
diff --git a/llvm/test/Transforms/InstCombine/assume_inevitable.ll b/llvm/test/Transforms/InstCombine/assume_inevitable.ll
index 5cb137d507c68..76a9a49adeda9 100644
--- a/llvm/test/Transforms/InstCombine/assume_inevitable.ll
+++ b/llvm/test/Transforms/InstCombine/assume_inevitable.ll
@@ -4,22 +4,20 @@
 ; Check that assume is propagated backwards through all
 ; operations that are `isGuaranteedToTransferExecutionToSuccessor`
 ; (it should reach the load and mark it as `align 32`).
-define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) {
+define i32 @assume_inevitable(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @assume_inevitable(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[M:%.*]] = alloca i64, align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 32
-; CHECK-NEXT:    [[LOADRES:%.*]] = load i32, i32* [[B:%.*]], align 4
-; CHECK-NEXT:    [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2)
-; CHECK-NEXT:    store i32 [[LOADRES2]], i32* [[A]], align 32
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A:%.*]], align 32
+; CHECK-NEXT:    [[LOADRES:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT:    [[LOADRES2:%.*]] = call i32 @llvm.annotation.i32(i32 [[LOADRES]], ptr nonnull @.str, ptr nonnull @.str1, i32 2)
+; CHECK-NEXT:    store i32 [[LOADRES2]], ptr [[A]], align 32
 ; CHECK-NEXT:    [[DUMMY_EQ:%.*]] = icmp ugt i32 [[LOADRES]], 42
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[DUMMY_EQ]])
-; CHECK-NEXT:    [[M_I8:%.*]] = bitcast i64* [[M]] to i8*
-; CHECK-NEXT:    [[M_A:%.*]] = call i8* @llvm.ptr.annotation.p0i8(i8* nonnull [[M_I8]], i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i64 0, i64 0), i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i64 0, i64 0), i32 2, i8* null)
-; CHECK-NEXT:    [[M_X:%.*]] = bitcast i8* [[M_A]] to i64*
-; CHECK-NEXT:    [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[C:%.*]], i1 false, i1 false, i1 false)
-; CHECK-NEXT:    store i64 [[OBJSZ]], i64* [[M_X]], align 4
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[M_A:%.*]] = call ptr @llvm.ptr.annotation.p0(ptr nonnull [[M]], ptr nonnull @.str, ptr nonnull @.str1, i32 2, ptr null)
+; CHECK-NEXT:    [[OBJSZ:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[C:%.*]], i1 false, i1 false, i1 false)
+; CHECK-NEXT:    store i64 [[OBJSZ]], ptr [[M_A]], align 4
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint ptr [[A]] to i64
 ; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
 ; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
@@ -28,30 +26,28 @@ define i32 @assume_inevitable(i32* %a, i32* %b, i8* %c) {
 entry:
   %dummy = alloca i8, align 4
   %m = alloca i64
-  %0 = load i32, i32* %a, align 4
+  %0 = load i32, ptr %a, align 4
 
   ; START perform a bunch of inevitable operations
-  %loadres = load i32, i32* %b
-  %loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2)
-  store i32 %loadres2, i32* %a
+  %loadres = load i32, ptr %b
+  %loadres2 = call i32 @llvm.annotation.i32(i32 %loadres, ptr @.str, ptr @.str1, i32 2)
+  store i32 %loadres2, ptr %a
 
   %dummy_eq = icmp ugt i32 %loadres, 42
   tail call void @llvm.assume(i1 %dummy_eq)
 
-  call void @llvm.lifetime.start.p0i8(i64 1, i8* %dummy)
-  %i = call {}* @llvm.invariant.start.p0i8(i64 1, i8* %dummy)
-  call void @llvm.invariant.end.p0i8({}* %i, i64 1, i8* %dummy)
-  call void @llvm.lifetime.end.p0i8(i64 1, i8* %dummy)
+  call void @llvm.lifetime.start.p0(i64 1, ptr %dummy)
+  %i = call ptr @llvm.invariant.start.p0(i64 1, ptr %dummy)
+  call void @llvm.invariant.end.p0(ptr %i, i64 1, ptr %dummy)
+  call void @llvm.lifetime.end.p0(i64 1, ptr %dummy)
 
-  %m_i8 = bitcast i64* %m to i8*
-  %m_a = call i8* @llvm.ptr.annotation.p0i8(i8* %m_i8, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str1, i32 0, i32 0), i32 2, i8* null)
-  %m_x = bitcast i8* %m_a to i64*
-  %objsz = call i64 @llvm.objectsize.i64.p0i8(i8* %c, i1 false)
-  store i64 %objsz, i64* %m_x
+  %m_a = call ptr @llvm.ptr.annotation.p0(ptr %m, ptr @.str, ptr @.str1, i32 2, ptr null)
+  %objsz = call i64 @llvm.objectsize.i64.p0(ptr %c, i1 false)
+  store i64 %objsz, ptr %m_a
   ; END perform a bunch of inevitable operations
 
   ; AND here's the assume:
-  %ptrint = ptrtoint i32* %a to i64
+  %ptrint = ptrtoint ptr %a to i64
   %maskedptr = and i64 %ptrint, 31
   %maskcond = icmp eq i64 %maskedptr, 0
   tail call void @llvm.assume(i1 %maskcond)
@@ -62,13 +58,13 @@ entry:
 @.str = private unnamed_addr constant [4 x i8] c"sth\00", section "llvm.metadata"
 @.str1 = private unnamed_addr constant [4 x i8] c"t.c\00", section "llvm.metadata"
 
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
-declare i32 @llvm.annotation.i32(i32, i8*, i8*, i32)
-declare i8* @llvm.ptr.annotation.p0i8(i8*, i8*, i8*, i32, i8*)
+declare i64 @llvm.objectsize.i64.p0(ptr, i1)
+declare i32 @llvm.annotation.i32(i32, ptr, ptr, i32)
+declare ptr @llvm.ptr.annotation.p0(ptr, ptr, ptr, i32, ptr)
 
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
 
-declare {}* @llvm.invariant.start.p0i8(i64, i8* nocapture)
-declare void @llvm.invariant.end.p0i8({}*, i64, i8* nocapture)
+declare ptr @llvm.invariant.start.p0(i64, ptr nocapture)
+declare void @llvm.invariant.end.p0(ptr, i64, ptr nocapture)
 declare void @llvm.assume(i1)
diff --git a/llvm/test/Transforms/InstCombine/call-returned.ll b/llvm/test/Transforms/InstCombine/call-returned.ll
index 418856c9a508b..4cbcaad0cf012 100644
--- a/llvm/test/Transforms/InstCombine/call-returned.ll
+++ b/llvm/test/Transforms/InstCombine/call-returned.ll
@@ -2,8 +2,8 @@
 ; RUN: opt -S -passes=instcombine < %s | FileCheck %s
 
 declare i32 @passthru_i32(i32 returned)
-declare i8* @passthru_p8(i8* returned)
-declare i8* @passthru_p8_from_p32(i32* returned)
+declare ptr @passthru_p8(ptr returned)
+declare ptr @passthru_p8_from_p32(ptr returned)
 declare <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> returned)
 
 define i32 @returned_const_int_arg() {
@@ -15,32 +15,31 @@ define i32 @returned_const_int_arg() {
   ret i32 %x
 }
 
-define i8* @returned_const_ptr_arg() {
+define ptr @returned_const_ptr_arg() {
 ; CHECK-LABEL: @returned_const_ptr_arg(
-; CHECK-NEXT:    [[X:%.*]] = call i8* @passthru_p8(i8* null)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    [[X:%.*]] = call ptr @passthru_p8(ptr null)
+; CHECK-NEXT:    ret ptr null
 ;
-  %x = call i8* @passthru_p8(i8* null)
-  ret i8* %x
+  %x = call ptr @passthru_p8(ptr null)
+  ret ptr %x
 }
 
-define i8* @returned_const_ptr_arg_casted() {
+define ptr @returned_const_ptr_arg_casted() {
 ; CHECK-LABEL: @returned_const_ptr_arg_casted(
-; CHECK-NEXT:    [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* null)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    [[X:%.*]] = call ptr @passthru_p8_from_p32(ptr null)
+; CHECK-NEXT:    ret ptr null
 ;
-  %x = call i8* @passthru_p8_from_p32(i32* null)
-  ret i8* %x
+  %x = call ptr @passthru_p8_from_p32(ptr null)
+  ret ptr %x
 }
 
-define i8* @returned_ptr_arg_casted(i32* %a) {
+define ptr @returned_ptr_arg_casted(ptr %a) {
 ; CHECK-LABEL: @returned_ptr_arg_casted(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to i8*
-; CHECK-NEXT:    [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* [[A]])
-; CHECK-NEXT:    ret i8* [[TMP1]]
+; CHECK-NEXT:    [[X:%.*]] = call ptr @passthru_p8_from_p32(ptr [[A:%.*]])
+; CHECK-NEXT:    ret ptr [[A]]
 ;
-  %x = call i8* @passthru_p8_from_p32(i32* %a)
-  ret i8* %x
+  %x = call ptr @passthru_p8_from_p32(ptr %a)
+  ret ptr %x
 }
 
 @GV = constant <2 x i32> zeroinitializer
@@ -49,7 +48,7 @@ define <8 x i8> @returned_const_vec_arg_casted() {
 ; CHECK-NEXT:    [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <8 x i8> zeroinitializer
 ;
-  %v = load <2 x i32>, <2 x i32>* @GV
+  %v = load <2 x i32>, ptr @GV
   %x = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> %v)
   ret <8 x i8> %x
 }
diff --git a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
index 436c3c603e30c..d0714ca5f8165 100644
--- a/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
+++ b/llvm/test/Transforms/InstCombine/constant-fold-address-space-pointer.ll
@@ -15,62 +15,62 @@ target datalayout = "e-p:32:32:32-p1:64:64:64-p2:8:8:8-p3:16:16:16-p4:16:16:16-i
 
 ; Test constant folding of inttoptr (ptrtoint constantexpr)
 ; The intermediate integer size is the same as the pointer size
-define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_same_size() {
+define ptr addrspace(3) @test_constant_fold_inttoptr_as_pointer_same_size() {
 ; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_same_size(
-; CHECK-NEXT:    ret i32 addrspace(3)* @const_zero_i32_as3
+; CHECK-NEXT:    ret ptr addrspace(3) @const_zero_i32_as3
 ;
-  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
-  %y = inttoptr i32 %x to i32 addrspace(3)*
-  ret i32 addrspace(3)* %y
+  %x = ptrtoint ptr addrspace(3) @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to ptr addrspace(3)
+  ret ptr addrspace(3) %y
 }
 
 ; The intermediate integer size is larger than the pointer size
-define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller() {
+define ptr addrspace(2) @test_constant_fold_inttoptr_as_pointer_smaller() {
 ; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller(
-; CHECK-NEXT:    ret i32 addrspace(2)* @const_zero_i32_as2
+; CHECK-NEXT:    ret ptr addrspace(2) @const_zero_i32_as2
 ;
-  %x = ptrtoint i32 addrspace(2)* @const_zero_i32_as2 to i16
-  %y = inttoptr i16 %x to i32 addrspace(2)*
-  ret i32 addrspace(2)* %y
+  %x = ptrtoint ptr addrspace(2) @const_zero_i32_as2 to i16
+  %y = inttoptr i16 %x to ptr addrspace(2)
+  ret ptr addrspace(2) %y
 }
 
 ; Different address spaces that are the same size, but they are
 ; different so nothing should happen
-define i32 addrspace(4)* @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
+define ptr addrspace(4) @test_constant_fold_inttoptr_as_pointer_smaller_different_as() {
 ; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_as(
-; CHECK-NEXT:    ret i32 addrspace(4)* inttoptr (i16 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i16) to i32 addrspace(4)*)
+; CHECK-NEXT:    ret ptr addrspace(4) inttoptr (i16 ptrtoint (ptr addrspace(3) @const_zero_i32_as3 to i16) to ptr addrspace(4))
 ;
-  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i16
-  %y = inttoptr i16 %x to i32 addrspace(4)*
-  ret i32 addrspace(4)* %y
+  %x = ptrtoint ptr addrspace(3) @const_zero_i32_as3 to i16
+  %y = inttoptr i16 %x to ptr addrspace(4)
+  ret ptr addrspace(4) %y
 }
 
 ; Make sure we don't introduce a bitcast between different sized
 ; address spaces when folding this
-define i32 addrspace(2)* @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
+define ptr addrspace(2) @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as() {
 ; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_smaller_different_size_as(
-; CHECK-NEXT:    ret i32 addrspace(2)* inttoptr (i32 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i32) to i32 addrspace(2)*)
+; CHECK-NEXT:    ret ptr addrspace(2) inttoptr (i32 ptrtoint (ptr addrspace(3) @const_zero_i32_as3 to i32) to ptr addrspace(2))
 ;
-  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i32
-  %y = inttoptr i32 %x to i32 addrspace(2)*
-  ret i32 addrspace(2)* %y
+  %x = ptrtoint ptr addrspace(3) @const_zero_i32_as3 to i32
+  %y = inttoptr i32 %x to ptr addrspace(2)
+  ret ptr addrspace(2) %y
 }
 
 ; The intermediate integer size is too small, nothing should happen
-define i32 addrspace(3)* @test_constant_fold_inttoptr_as_pointer_larger() {
+define ptr addrspace(3) @test_constant_fold_inttoptr_as_pointer_larger() {
 ; CHECK-LABEL: @test_constant_fold_inttoptr_as_pointer_larger(
-; CHECK-NEXT:    ret i32 addrspace(3)* inttoptr (i8 ptrtoint (i32 addrspace(3)* @const_zero_i32_as3 to i8) to i32 addrspace(3)*)
+; CHECK-NEXT:    ret ptr addrspace(3) inttoptr (i8 ptrtoint (ptr addrspace(3) @const_zero_i32_as3 to i8) to ptr addrspace(3))
 ;
-  %x = ptrtoint i32 addrspace(3)* @const_zero_i32_as3 to i8
-  %y = inttoptr i8 %x to i32 addrspace(3)*
-  ret i32 addrspace(3)* %y
+  %x = ptrtoint ptr addrspace(3) @const_zero_i32_as3 to i8
+  %y = inttoptr i8 %x to ptr addrspace(3)
+  ret ptr addrspace(3) %y
 }
 
 define i8 @const_fold_ptrtoint() {
 ; CHECK-LABEL: @const_fold_ptrtoint(
 ; CHECK-NEXT:    ret i8 4
 ;
-  ret i8 ptrtoint (i32 addrspace(2)* inttoptr (i4 4 to i32 addrspace(2)*) to i8)
+  ret i8 ptrtoint (ptr addrspace(2) inttoptr (i4 4 to ptr addrspace(2)) to i8)
 }
 
 ; Test that mask happens when the destination pointer is smaller than
@@ -79,7 +79,7 @@ define i8 @const_fold_ptrtoint_mask() {
 ; CHECK-LABEL: @const_fold_ptrtoint_mask(
 ; CHECK-NEXT:    ret i8 1
 ;
-  ret i8 ptrtoint (i32 addrspace(3)* inttoptr (i32 257 to i32 addrspace(3)*) to i8)
+  ret i8 ptrtoint (ptr addrspace(3) inttoptr (i32 257 to ptr addrspace(3)) to i8)
 }
 
 ; Address space 0 is too small for the correct mask, should mask with
@@ -88,22 +88,22 @@ define i64 @const_fold_ptrtoint_mask_small_as0() {
 ; CHECK-LABEL: @const_fold_ptrtoint_mask_small_as0(
 ; CHECK-NEXT:    ret i64 -1
 ;
-  ret i64 ptrtoint (i32 addrspace(1)* inttoptr (i128 -1 to i32 addrspace(1)*) to i64)
+  ret i64 ptrtoint (ptr addrspace(1) inttoptr (i128 -1 to ptr addrspace(1)) to i64)
 }
 
-define i32 addrspace(3)* @const_inttoptr() {
+define ptr addrspace(3) @const_inttoptr() {
 ; CHECK-LABEL: @const_inttoptr(
-; CHECK-NEXT:    ret i32 addrspace(3)* inttoptr (i16 4 to i32 addrspace(3)*)
+; CHECK-NEXT:    ret ptr addrspace(3) inttoptr (i16 4 to ptr addrspace(3))
 ;
-  %p = inttoptr i16 4 to i32 addrspace(3)*
-  ret i32 addrspace(3)* %p
+  %p = inttoptr i16 4 to ptr addrspace(3)
+  ret ptr addrspace(3) %p
 }
 
 define i16 @const_ptrtoint() {
 ; CHECK-LABEL: @const_ptrtoint(
-; CHECK-NEXT:    ret i16 ptrtoint (i32 addrspace(3)* @g to i16)
+; CHECK-NEXT:    ret i16 ptrtoint (ptr addrspace(3) @g to i16)
 ;
-  %i = ptrtoint i32 addrspace(3)* @g to i16
+  %i = ptrtoint ptr addrspace(3) @g to i16
   ret i16 %i
 }
 
@@ -111,14 +111,14 @@ define i16 @const_inttoptr_ptrtoint() {
 ; CHECK-LABEL: @const_inttoptr_ptrtoint(
 ; CHECK-NEXT:    ret i16 9
 ;
-  ret i16 ptrtoint (i32 addrspace(3)* inttoptr (i16 9 to i32 addrspace(3)*) to i16)
+  ret i16 ptrtoint (ptr addrspace(3) inttoptr (i16 9 to ptr addrspace(3)) to i16)
 }
 
 define i1 @constant_fold_cmp_constantexpr_inttoptr() {
 ; CHECK-LABEL: @constant_fold_cmp_constantexpr_inttoptr(
 ; CHECK-NEXT:    ret i1 true
 ;
-  %x = icmp eq i32 addrspace(3)* inttoptr (i16 0 to i32 addrspace(3)*), null
+  %x = icmp eq ptr addrspace(3) inttoptr (i16 0 to ptr addrspace(3)), null
   ret i1 %x
 }
 
@@ -126,23 +126,23 @@ define i1 @constant_fold_inttoptr_null(i16 %i) {
 ; CHECK-LABEL: @constant_fold_inttoptr_null(
 ; CHECK-NEXT:    ret i1 false
 ;
-  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 0 to i32 addrspace(3)*)
+  %x = icmp eq ptr addrspace(3) inttoptr (i16 99 to ptr addrspace(3)), inttoptr (i16 0 to ptr addrspace(3))
   ret i1 %x
 }
 
 define i1 @constant_fold_ptrtoint_null() {
 ; CHECK-LABEL: @constant_fold_ptrtoint_null(
-; CHECK-NEXT:    ret i1 icmp eq (i32 addrspace(3)* @g, i32 addrspace(3)* null)
+; CHECK-NEXT:    ret i1 icmp eq (ptr addrspace(3) @g, ptr addrspace(3) null)
 ;
-  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* null to i16)
+  %x = icmp eq i16 ptrtoint (ptr addrspace(3) @g to i16), ptrtoint (ptr addrspace(3) null to i16)
   ret i1 %x
 }
 
 define i1 @constant_fold_ptrtoint_null_2() {
 ; CHECK-LABEL: @constant_fold_ptrtoint_null_2(
-; CHECK-NEXT:    ret i1 icmp eq (i32 addrspace(3)* @g, i32 addrspace(3)* null)
+; CHECK-NEXT:    ret i1 icmp eq (ptr addrspace(3) @g, ptr addrspace(3) null)
 ;
-  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* null to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  %x = icmp eq i16 ptrtoint (ptr addrspace(3) null to i16), ptrtoint (ptr addrspace(3) @g to i16)
   ret i1 %x
 }
 
@@ -150,7 +150,7 @@ define i1 @constant_fold_ptrtoint() {
 ; CHECK-LABEL: @constant_fold_ptrtoint(
 ; CHECK-NEXT:    ret i1 true
 ;
-  %x = icmp eq i16 ptrtoint (i32 addrspace(3)* @g to i16), ptrtoint (i32 addrspace(3)* @g to i16)
+  %x = icmp eq i16 ptrtoint (ptr addrspace(3) @g to i16), ptrtoint (ptr addrspace(3) @g to i16)
   ret i1 %x
 }
 
@@ -158,7 +158,7 @@ define i1 @constant_fold_inttoptr() {
 ; CHECK-LABEL: @constant_fold_inttoptr(
 ; CHECK-NEXT:    ret i1 false
 ;
-  %x = icmp eq i32 addrspace(3)* inttoptr (i16 99 to i32 addrspace(3)*), inttoptr (i16 27 to i32 addrspace(3)*)
+  %x = icmp eq ptr addrspace(3) inttoptr (i16 99 to ptr addrspace(3)), inttoptr (i16 27 to ptr addrspace(3))
   ret i1 %x
 }
 
@@ -167,28 +167,28 @@ define i1 @constant_fold_inttoptr() {
 
 define float @constant_fold_bitcast_ftoi_load() {
 ; CHECK-LABEL: @constant_fold_bitcast_ftoi_load(
-; CHECK-NEXT:    [[A:%.*]] = load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+; CHECK-NEXT:    [[A:%.*]] = load float, ptr addrspace(3) @g, align 4
 ; CHECK-NEXT:    ret float [[A]]
 ;
-  %a = load float, float addrspace(3)* bitcast (i32 addrspace(3)* @g to float addrspace(3)*), align 4
+  %a = load float, ptr addrspace(3) @g, align 4
   ret float %a
 }
 
 define i32 @constant_fold_bitcast_itof_load() {
 ; CHECK-LABEL: @constant_fold_bitcast_itof_load(
-; CHECK-NEXT:    [[A:%.*]] = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr addrspace(3) @g_float_as3, align 4
 ; CHECK-NEXT:    ret i32 [[A]]
 ;
-  %a = load i32, i32 addrspace(3)* bitcast (float addrspace(3)* @g_float_as3 to i32 addrspace(3)*), align 4
+  %a = load i32, ptr addrspace(3) @g_float_as3, align 4
   ret i32 %a
 }
 
 define <4 x float> @constant_fold_bitcast_vector_as() {
 ; CHECK-LABEL: @constant_fold_bitcast_vector_as(
-; CHECK-NEXT:    [[A:%.*]] = load <4 x float>, <4 x float> addrspace(3)* @g_v4f_as3, align 16
+; CHECK-NEXT:    [[A:%.*]] = load <4 x float>, ptr addrspace(3) @g_v4f_as3, align 16
 ; CHECK-NEXT:    ret <4 x float> [[A]]
 ;
-  %a = load <4 x float>, <4 x float> addrspace(3)* bitcast (<4 x i32> addrspace(3)* bitcast (<4 x float> addrspace(3)* @g_v4f_as3 to <4 x i32> addrspace(3)*) to <4 x float> addrspace(3)*), align 4
+  %a = load <4 x float>, ptr addrspace(3) @g_v4f_as3, align 4
   ret <4 x float> %a
 }
 
@@ -196,40 +196,38 @@ define <4 x float> @constant_fold_bitcast_vector_as() {
 
 define i32 @test_cast_gep_small_indices_as() {
 ; CHECK-LABEL: @test_cast_gep_small_indices_as(
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr addrspace(3) @i32_array_as3, align 16
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
-  %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i7 0, i7 0
-  %x = load i32, i32 addrspace(3)* %p, align 4
+  %x = load i32, ptr addrspace(3) @i32_array_as3, align 4
   ret i32 %x
 }
 
-%struct.foo = type { float, float, [4 x i32], i32 addrspace(3)* }
+%struct.foo = type { float, float, [4 x i32], ptr addrspace(3) }
 
 @constant_fold_global_ptr = addrspace(3) global %struct.foo {
   float 0.0,
   float 0.0,
   [4 x i32] zeroinitializer,
-  i32 addrspace(3)* getelementptr ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0)
+  ptr addrspace(3) @i32_array_as3
 }
 
 define i32 @test_cast_gep_large_indices_as() {
 ; CHECK-LABEL: @test_cast_gep_large_indices_as(
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds ([10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i16 0, i16 0), align 16
+; CHECK-NEXT:    [[X:%.*]] = load i32, ptr addrspace(3) @i32_array_as3, align 16
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
-  %p = getelementptr [10 x i32], [10 x i32] addrspace(3)* @i32_array_as3, i64 0, i64 0
-  %x = load i32, i32 addrspace(3)* %p, align 4
+  %x = load i32, ptr addrspace(3) @i32_array_as3, align 4
   ret i32 %x
 }
 
 define i32 @test_constant_cast_gep_struct_indices_as() {
 ; CHECK-LABEL: @test_constant_cast_gep_struct_indices_as(
-; CHECK-NEXT:    [[Y:%.*]] = load i32, i32 addrspace(3)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(3)* @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 16
+; CHECK-NEXT:    [[Y:%.*]] = load i32, ptr addrspace(3) getelementptr inbounds (%struct.foo, ptr addrspace(3) @constant_fold_global_ptr, i16 0, i32 2, i16 2), align 16
 ; CHECK-NEXT:    ret i32 [[Y]]
 ;
-  %x = getelementptr %struct.foo, %struct.foo addrspace(3)* @constant_fold_global_ptr, i18 0, i32 2, i12 2
-  %y = load i32, i32 addrspace(3)* %x, align 4
+  %x = getelementptr %struct.foo, ptr addrspace(3) @constant_fold_global_ptr, i18 0, i32 2, i12 2
+  %y = load i32, ptr addrspace(3) %x, align 4
   ret i32 %y
 }
 
@@ -239,8 +237,8 @@ define i32 @test_read_data_from_global_as3() {
 ; CHECK-LABEL: @test_read_data_from_global_as3(
 ; CHECK-NEXT:    ret i32 2
 ;
-  %x = getelementptr [5 x i32], [5 x i32] addrspace(3)* @constant_data_as3, i32 0, i32 1
-  %y = load i32, i32 addrspace(3)* %x, align 4
+  %x = getelementptr [5 x i32], ptr addrspace(3) @constant_data_as3, i32 0, i32 1
+  %y = load i32, ptr addrspace(3) %x, align 4
   ret i32 %y
 }
 
@@ -249,16 +247,16 @@ define i32 @test_read_data_from_global_as3() {
 @c = addrspace(1) constant i32 34
 @d = addrspace(1) constant i32 99
 
-@ptr_array = addrspace(2) constant [4 x i32 addrspace(1)*] [ i32 addrspace(1)* @a, i32 addrspace(1)* @b, i32 addrspace(1)* @c, i32 addrspace(1)* @d]
-@indirect = addrspace(0) constant i32 addrspace(1)* addrspace(2)* getelementptr inbounds ([4 x i32 addrspace(1)*], [4 x i32 addrspace(1)*] addrspace(2)* @ptr_array, i1 0, i32 2)
+@ptr_array = addrspace(2) constant [4 x ptr addrspace(1)] [ ptr addrspace(1) @a, ptr addrspace(1) @b, ptr addrspace(1) @c, ptr addrspace(1) @d]
+@indirect = addrspace(0) constant ptr addrspace(2) getelementptr inbounds ([4 x ptr addrspace(1)], ptr addrspace(2) @ptr_array, i1 0, i32 2)
 
 define i32 @constant_through_array_as_ptrs() {
 ; CHECK-LABEL: @constant_through_array_as_ptrs(
 ; CHECK-NEXT:    ret i32 34
 ;
-  %p = load i32 addrspace(1)* addrspace(2)*, i32 addrspace(1)* addrspace(2)* addrspace(0)* @indirect, align 4
-  %a = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %p, align 4
-  %b = load i32, i32 addrspace(1)* %a, align 4
+  %p = load ptr addrspace(2), ptr addrspace(0) @indirect, align 4
+  %a = load ptr addrspace(1), ptr addrspace(2) %p, align 4
+  %b = load i32, ptr addrspace(1) %a, align 4
   ret i32 %b
 }
 
@@ -266,11 +264,11 @@ define i32 @constant_through_array_as_ptrs() {
 
 define float @canonicalize_addrspacecast(i32 %i) {
 ; CHECK-LABEL: @canonicalize_addrspacecast(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds float, float* addrspacecast (float addrspace(3)* bitcast ([0 x i8] addrspace(3)* @shared_mem to float addrspace(3)*) to float*), i32 [[I:%.*]]
-; CHECK-NEXT:    [[V:%.*]] = load float, float* [[P]], align 4
+; CHECK-NEXT:    [[P:%.*]] = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @shared_mem to ptr), i32 [[I:%.*]]
+; CHECK-NEXT:    [[V:%.*]] = load float, ptr [[P]], align 4
 ; CHECK-NEXT:    ret float [[V]]
 ;
-  %p = getelementptr inbounds float, float* addrspacecast ([0 x i8] addrspace(3)* @shared_mem to float*), i32 %i
-  %v = load float, float* %p
+  %p = getelementptr inbounds float, ptr addrspacecast (ptr addrspace(3) @shared_mem to ptr), i32 %i
+  %v = load float, ptr %p
   ret float %v
 }
diff --git a/llvm/test/Transforms/InstCombine/deref-alloc-fns.ll b/llvm/test/Transforms/InstCombine/deref-alloc-fns.ll
index 872b34ce7ea59..43edba1eacba9 100644
--- a/llvm/test/Transforms/InstCombine/deref-alloc-fns.ll
+++ b/llvm/test/Transforms/InstCombine/deref-alloc-fns.ll
@@ -3,374 +3,374 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -passes=instcombine -S | FileCheck %s --check-prefixes=CHECK,GNU
 
 
-declare noalias i8* @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
-declare noalias i8* @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
-declare noalias i8* @realloc(i8* nocapture, i64) allockind("realloc") allocsize(1) "alloc-family"="malloc"
-declare noalias nonnull i8* @_Znam(i64) ; throwing version of 'new'
-declare noalias nonnull i8* @_Znwm(i64) ; throwing version of 'new'
-declare noalias i8* @strdup(i8*)
-declare noalias i8* @aligned_alloc(i64 allocalign, i64) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
-declare noalias align 16 i8* @memalign(i64, i64)
+declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc"
+declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc"
+declare noalias ptr @realloc(ptr nocapture, i64) allockind("realloc") allocsize(1) "alloc-family"="malloc"
+declare noalias nonnull ptr @_Znam(i64) ; throwing version of 'new'
+declare noalias nonnull ptr @_Znwm(i64) ; throwing version of 'new'
+declare noalias ptr @strdup(ptr)
+declare noalias ptr @aligned_alloc(i64 allocalign, i64) allockind("alloc,uninitialized,aligned") allocsize(1) "alloc-family"="malloc"
+declare noalias align 16 ptr @memalign(i64, i64)
 ; new[](unsigned int, align_val_t)
-declare noalias i8* @_ZnamSt11align_val_t(i64 %size, i64 %align)
+declare noalias ptr @_ZnamSt11align_val_t(i64 %size, i64 %align)
 
-declare i8* @my_malloc(i64) allocsize(0)
-declare i8* @my_calloc(i64, i64) allocsize(0, 1)
+declare ptr @my_malloc(i64) allocsize(0)
+declare ptr @my_calloc(i64, i64) allocsize(0, 1)
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 
-define noalias i8* @malloc_nonconstant_size(i64 %n) {
+define noalias ptr @malloc_nonconstant_size(i64 %n) {
 ; CHECK-LABEL: @malloc_nonconstant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @malloc(i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @malloc(i64 %n)
-  ret i8* %call
+  %call = tail call noalias ptr @malloc(i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @malloc_constant_size() {
+define noalias ptr @malloc_constant_size() {
 ; CHECK-LABEL: @malloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) i8* @malloc(i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) ptr @malloc(i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @malloc(i64 40)
-  ret i8* %call
+  %call = tail call noalias ptr @malloc(i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @aligned_alloc_constant_size() {
+define noalias ptr @aligned_alloc_constant_size() {
 ; CHECK-LABEL: @aligned_alloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 dereferenceable_or_null(512) i8* @aligned_alloc(i64 32, i64 512)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 dereferenceable_or_null(512) ptr @aligned_alloc(i64 32, i64 512)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @aligned_alloc(i64 32, i64 512)
-  ret i8* %call
+  %call = tail call noalias ptr @aligned_alloc(i64 32, i64 512)
+  ret ptr %call
 }
 
-define noalias i8* @aligned_alloc_unknown_size_nonzero(i1 %c) {
+define noalias ptr @aligned_alloc_unknown_size_nonzero(i1 %c) {
 ; CHECK-LABEL: @aligned_alloc_unknown_size_nonzero(
 ; CHECK-NEXT:    [[SIZE:%.*]] = select i1 [[C:%.*]], i64 64, i64 128
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 i8* @aligned_alloc(i64 32, i64 [[SIZE]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 ptr @aligned_alloc(i64 32, i64 [[SIZE]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %size = select i1 %c, i64 64, i64 128
-  %call = tail call noalias i8* @aligned_alloc(i64 32, i64 %size)
-  ret i8* %call
+  %call = tail call noalias ptr @aligned_alloc(i64 32, i64 %size)
+  ret ptr %call
 }
 
-define noalias i8* @aligned_alloc_unknown_size_possibly_zero(i1 %c) {
+define noalias ptr @aligned_alloc_unknown_size_possibly_zero(i1 %c) {
 ; CHECK-LABEL: @aligned_alloc_unknown_size_possibly_zero(
 ; CHECK-NEXT:    [[SIZE:%.*]] = select i1 [[C:%.*]], i64 64, i64 0
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 i8* @aligned_alloc(i64 32, i64 [[SIZE]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias align 32 ptr @aligned_alloc(i64 32, i64 [[SIZE]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
   %size = select i1 %c, i64 64, i64 0
-  %call = tail call noalias i8* @aligned_alloc(i64 32, i64 %size)
-  ret i8* %call
+  %call = tail call noalias ptr @aligned_alloc(i64 32, i64 %size)
+  ret ptr %call
 }
 
-define noalias i8* @aligned_alloc_unknown_align(i64 %align) {
+define noalias ptr @aligned_alloc_unknown_align(i64 %align) {
 ; CHECK-LABEL: @aligned_alloc_unknown_align(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) i8* @aligned_alloc(i64 [[ALIGN:%.*]], i64 128)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) ptr @aligned_alloc(i64 [[ALIGN:%.*]], i64 128)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @aligned_alloc(i64 %align, i64 128)
-  ret i8* %call
+  %call = tail call noalias ptr @aligned_alloc(i64 %align, i64 128)
+  ret ptr %call
 }
 
-declare noalias i8* @foo(i8*, i8*, i8*)
+declare noalias ptr @foo(ptr, ptr, ptr)
 
-define noalias i8* @aligned_alloc_dynamic_args(i64 %align, i64 %size) {
+define noalias ptr @aligned_alloc_dynamic_args(i64 %align, i64 %size) {
 ; CHECK-LABEL: @aligned_alloc_dynamic_args(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(1024) i8* @aligned_alloc(i64 [[ALIGN:%.*]], i64 1024)
-; CHECK-NEXT:    [[CALL_1:%.*]] = tail call noalias dereferenceable_or_null(1024) i8* @aligned_alloc(i64 0, i64 1024)
-; CHECK-NEXT:    [[CALL_2:%.*]] = tail call noalias align 32 i8* @aligned_alloc(i64 32, i64 [[SIZE:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = call i8* @foo(i8* [[CALL]], i8* [[CALL_1]], i8* [[CALL_2]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(1024) ptr @aligned_alloc(i64 [[ALIGN:%.*]], i64 1024)
+; CHECK-NEXT:    [[CALL_1:%.*]] = tail call noalias dereferenceable_or_null(1024) ptr @aligned_alloc(i64 0, i64 1024)
+; CHECK-NEXT:    [[CALL_2:%.*]] = tail call noalias align 32 ptr @aligned_alloc(i64 32, i64 [[SIZE:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @foo(ptr [[CALL]], ptr [[CALL_1]], ptr [[CALL_2]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @aligned_alloc(i64 %align, i64 1024)
-  %call_1 = tail call noalias i8* @aligned_alloc(i64 0, i64 1024)
-  %call_2 = tail call noalias i8* @aligned_alloc(i64 32, i64 %size)
+  %call = tail call noalias ptr @aligned_alloc(i64 %align, i64 1024)
+  %call_1 = tail call noalias ptr @aligned_alloc(i64 0, i64 1024)
+  %call_2 = tail call noalias ptr @aligned_alloc(i64 32, i64 %size)
 
-  call i8* @foo(i8* %call, i8* %call_1, i8* %call_2)
-  ret i8* %call
+  call ptr @foo(ptr %call, ptr %call_1, ptr %call_2)
+  ret ptr %call
 }
 
-define noalias i8* @memalign_constant_size() {
+define noalias ptr @memalign_constant_size() {
 ; GNU-LABEL: @memalign_constant_size(
-; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 dereferenceable_or_null(512) i8* @memalign(i64 32, i64 512)
-; GNU-NEXT:    ret i8* [[CALL]]
+; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 dereferenceable_or_null(512) ptr @memalign(i64 32, i64 512)
+; GNU-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @memalign(i64 32, i64 512)
-  ret i8* %call
+  %call = tail call noalias ptr @memalign(i64 32, i64 512)
+  ret ptr %call
 }
 
-define noalias i8* @memalign_unknown_size_nonzero(i1 %c) {
+define noalias ptr @memalign_unknown_size_nonzero(i1 %c) {
 ; GNU-LABEL: @memalign_unknown_size_nonzero(
 ; GNU-NEXT:    [[SIZE:%.*]] = select i1 [[C:%.*]], i64 64, i64 128
-; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 i8* @memalign(i64 32, i64 [[SIZE]])
-; GNU-NEXT:    ret i8* [[CALL]]
+; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 ptr @memalign(i64 32, i64 [[SIZE]])
+; GNU-NEXT:    ret ptr [[CALL]]
 ;
   %size = select i1 %c, i64 64, i64 128
-  %call = tail call noalias i8* @memalign(i64 32, i64 %size)
-  ret i8* %call
+  %call = tail call noalias ptr @memalign(i64 32, i64 %size)
+  ret ptr %call
 }
 
-define noalias i8* @memalign_unknown_size_possibly_zero(i1 %c) {
+define noalias ptr @memalign_unknown_size_possibly_zero(i1 %c) {
 ; GNU-LABEL: @memalign_unknown_size_possibly_zero(
 ; GNU-NEXT:    [[SIZE:%.*]] = select i1 [[C:%.*]], i64 64, i64 0
-; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 i8* @memalign(i64 32, i64 [[SIZE]])
-; GNU-NEXT:    ret i8* [[CALL]]
+; GNU-NEXT:    [[CALL:%.*]] = tail call noalias align 32 ptr @memalign(i64 32, i64 [[SIZE]])
+; GNU-NEXT:    ret ptr [[CALL]]
 ;
   %size = select i1 %c, i64 64, i64 0
-  %call = tail call noalias i8* @memalign(i64 32, i64 %size)
-  ret i8* %call
+  %call = tail call noalias ptr @memalign(i64 32, i64 %size)
+  ret ptr %call
 }
 
-define noalias i8* @memalign_unknown_align(i64 %align) {
+define noalias ptr @memalign_unknown_align(i64 %align) {
 ; GNU-LABEL: @memalign_unknown_align(
-; GNU-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) i8* @memalign(i64 [[ALIGN:%.*]], i64 128)
-; GNU-NEXT:    ret i8* [[CALL]]
+; GNU-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) ptr @memalign(i64 [[ALIGN:%.*]], i64 128)
+; GNU-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @memalign(i64 %align, i64 128)
-  ret i8* %call
+  %call = tail call noalias ptr @memalign(i64 %align, i64 128)
+  ret ptr %call
 }
 
-define noalias i8* @malloc_constant_size2() {
+define noalias ptr @malloc_constant_size2() {
 ; CHECK-LABEL: @malloc_constant_size2(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) i8* @malloc(i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) ptr @malloc(i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias dereferenceable_or_null(80) i8* @malloc(i64 40)
-  ret i8* %call
+  %call = tail call noalias dereferenceable_or_null(80) ptr @malloc(i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @malloc_constant_size3() {
+define noalias ptr @malloc_constant_size3() {
 ; CHECK-LABEL: @malloc_constant_size3(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable(80) dereferenceable_or_null(40) i8* @malloc(i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable(80) dereferenceable_or_null(40) ptr @malloc(i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias dereferenceable(80) i8* @malloc(i64 40)
-  ret i8* %call
+  %call = tail call noalias dereferenceable(80) ptr @malloc(i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @malloc_constant_zero_size() {
+define noalias ptr @malloc_constant_zero_size() {
 ; CHECK-LABEL: @malloc_constant_zero_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @malloc(i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @malloc(i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @malloc(i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @malloc(i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @realloc_nonconstant_size(i8* %p, i64 %n) {
+define noalias ptr @realloc_nonconstant_size(ptr %p, i64 %n) {
 ; CHECK-LABEL: @realloc_nonconstant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @realloc(i8* [[P:%.*]], i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @realloc(ptr [[P:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @realloc(i8* %p, i64 %n)
-  ret i8* %call
+  %call = tail call noalias ptr @realloc(ptr %p, i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @realloc_constant_zero_size(i8* %p) {
+define noalias ptr @realloc_constant_zero_size(ptr %p) {
 ; CHECK-LABEL: @realloc_constant_zero_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @realloc(i8* [[P:%.*]], i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @realloc(ptr [[P:%.*]], i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @realloc(i8* %p, i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @realloc(ptr %p, i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @realloc_constant_size(i8* %p) {
+define noalias ptr @realloc_constant_size(ptr %p) {
 ; CHECK-LABEL: @realloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) i8* @realloc(i8* [[P:%.*]], i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(40) ptr @realloc(ptr [[P:%.*]], i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @realloc(i8* %p, i64 40)
-  ret i8* %call
+  %call = tail call noalias ptr @realloc(ptr %p, i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_nonconstant_size(i64 %n) {
+define noalias ptr @calloc_nonconstant_size(i64 %n) {
 ; CHECK-LABEL: @calloc_nonconstant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 1, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 1, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 1, i64 %n)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 1, i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_nonconstant_size2(i64 %n) {
+define noalias ptr @calloc_nonconstant_size2(i64 %n) {
 ; CHECK-LABEL: @calloc_nonconstant_size2(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 [[N:%.*]], i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 [[N:%.*]], i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 %n, i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 %n, i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_nonconstant_size3(i64 %n) {
+define noalias ptr @calloc_nonconstant_size3(i64 %n) {
 ; CHECK-LABEL: @calloc_nonconstant_size3(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 [[N:%.*]], i64 [[N]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 [[N:%.*]], i64 [[N]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 %n, i64 %n)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 %n, i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_zero_size() {
+define noalias ptr @calloc_constant_zero_size() {
 ; CHECK-LABEL: @calloc_constant_zero_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 0, i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 0, i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 0, i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 0, i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_zero_size2(i64 %n) {
+define noalias ptr @calloc_constant_zero_size2(i64 %n) {
 ; CHECK-LABEL: @calloc_constant_zero_size2(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 [[N:%.*]], i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 [[N:%.*]], i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 %n, i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 %n, i64 0)
+  ret ptr %call
 }
 
 
-define noalias i8* @calloc_constant_zero_size3(i64 %n) {
+define noalias ptr @calloc_constant_zero_size3(i64 %n) {
 ; CHECK-LABEL: @calloc_constant_zero_size3(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 0, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 0, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 0, i64 %n)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 0, i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_zero_size4(i64 %n) {
+define noalias ptr @calloc_constant_zero_size4(i64 %n) {
 ; CHECK-LABEL: @calloc_constant_zero_size4(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 0, i64 1)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 0, i64 1)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 0, i64 1)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 0, i64 1)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_zero_size5(i64 %n) {
+define noalias ptr @calloc_constant_zero_size5(i64 %n) {
 ; CHECK-LABEL: @calloc_constant_zero_size5(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 1, i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 1, i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 1, i64 0)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 1, i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_size() {
+define noalias ptr @calloc_constant_size() {
 ; CHECK-LABEL: @calloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) i8* @calloc(i64 16, i64 8)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(128) ptr @calloc(i64 16, i64 8)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 16, i64 8)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 16, i64 8)
+  ret ptr %call
 }
 
-define noalias i8* @calloc_constant_size_overflow() {
+define noalias ptr @calloc_constant_size_overflow() {
 ; CHECK-LABEL: @calloc_constant_size_overflow(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @calloc(i64 2000000000000, i64 80000000000)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @calloc(i64 2000000000000, i64 80000000000)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @calloc(i64 2000000000000, i64 80000000000)
-  ret i8* %call
+  %call = tail call noalias ptr @calloc(i64 2000000000000, i64 80000000000)
+  ret ptr %call
 }
 
-define noalias i8* @op_new_nonconstant_size(i64 %n) {
+define noalias ptr @op_new_nonconstant_size(i64 %n) {
 ; CHECK-LABEL: @op_new_nonconstant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @_Znam(i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @_Znam(i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call i8* @_Znam(i64 %n)
-  ret i8* %call
+  %call = tail call ptr @_Znam(i64 %n)
+  ret ptr %call
 }
 
-define noalias i8* @op_new_constant_size() {
+define noalias ptr @op_new_constant_size() {
 ; CHECK-LABEL: @op_new_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable(40) i8* @_Znam(i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable(40) ptr @_Znam(i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call i8* @_Znam(i64 40)
-  ret i8* %call
+  %call = tail call ptr @_Znam(i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @op_new_constant_size2() {
+define noalias ptr @op_new_constant_size2() {
 ; CHECK-LABEL: @op_new_constant_size2(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable(40) i8* @_Znwm(i64 40)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable(40) ptr @_Znwm(i64 40)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call i8* @_Znwm(i64 40)
-  ret i8* %call
+  %call = tail call ptr @_Znwm(i64 40)
+  ret ptr %call
 }
 
-define noalias i8* @op_new_constant_zero_size() {
+define noalias ptr @op_new_constant_zero_size() {
 ; CHECK-LABEL: @op_new_constant_zero_size(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @_Znam(i64 0)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @_Znam(i64 0)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call i8* @_Znam(i64 0)
-  ret i8* %call
+  %call = tail call ptr @_Znam(i64 0)
+  ret ptr %call
 }
 
-define noalias i8* @strdup_constant_str() {
+define noalias ptr @strdup_constant_str() {
 ; CHECK-LABEL: @strdup_constant_str(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(6) i8* @strdup(i8* nonnull getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(6) ptr @strdup(ptr nonnull @.str)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @strdup(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0))
-  ret i8* %call
+  %call = tail call noalias ptr @strdup(ptr @.str)
+  ret ptr %call
 }
 
-define noalias i8* @strdup_notconstant_str(i8 * %str) {
+define noalias ptr @strdup_notconstant_str(ptr %str) {
 ; CHECK-LABEL: @strdup_notconstant_str(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias i8* @strdup(i8* [[STR:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias ptr @strdup(ptr [[STR:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call noalias i8* @strdup(i8* %str)
-  ret i8* %call
+  %call = tail call noalias ptr @strdup(ptr %str)
+  ret ptr %call
 }
 
 ; OSS-Fuzz #23214
 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=23214
-define noalias i8* @ossfuzz_23214() {
+define noalias ptr @ossfuzz_23214() {
 ; CHECK-LABEL: @ossfuzz_23214(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(512) i8* @aligned_alloc(i64 -9223372036854775808, i64 512)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noalias dereferenceable_or_null(512) ptr @aligned_alloc(i64 -9223372036854775808, i64 512)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
 bb:
   %and = and i64 -1, -9223372036854775808
-  %call = tail call noalias i8* @aligned_alloc(i64 %and, i64 512)
-  ret i8* %call
+  %call = tail call noalias ptr @aligned_alloc(i64 %and, i64 512)
+  ret ptr %call
 }
 
-define noalias i8* @op_new_align() {
+define noalias ptr @op_new_align() {
 ; CHECK-LABEL: @op_new_align(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call align 32 dereferenceable_or_null(32) i8* @_ZnamSt11align_val_t(i64 32, i64 32)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = tail call align 32 dereferenceable_or_null(32) ptr @_ZnamSt11align_val_t(i64 32, i64 32)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = tail call i8* @_ZnamSt11align_val_t(i64 32, i64 32)
-  ret i8* %call
+  %call = tail call ptr @_ZnamSt11align_val_t(i64 32, i64 32)
+  ret ptr %call
 }
 
-define i8* @my_malloc_constant_size() {
+define ptr @my_malloc_constant_size() {
 ; CHECK-LABEL: @my_malloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(32) i8* @my_malloc(i64 32)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(32) ptr @my_malloc(i64 32)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @my_malloc(i64 32)
-  ret i8* %call
+  %call = call ptr @my_malloc(i64 32)
+  ret ptr %call
 }
 
-define i8* @my_calloc_constant_size() {
+define ptr @my_calloc_constant_size() {
 ; CHECK-LABEL: @my_calloc_constant_size(
-; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(128) i8* @my_calloc(i64 32, i64 4)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call dereferenceable_or_null(128) ptr @my_calloc(i64 32, i64 4)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @my_calloc(i64 32, i64 4)
-  ret i8* %call
+  %call = call ptr @my_calloc(i64 32, i64 4)
+  ret ptr %call
 }
diff --git a/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll b/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll
index 84b3d8cc3e9b1..fc0a758d374d5 100644
--- a/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll
+++ b/llvm/test/Transforms/InstCombine/element-atomic-memintrins.ll
@@ -4,109 +4,100 @@
 ;; ---- memset -----
 
 ; Ensure 0-length memset is removed
-define void @test_memset_zero_length(i8* %dest) {
+define void @test_memset_zero_length(ptr %dest) {
 ; CHECK-LABEL: @test_memset_zero_length(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 0, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 0, i32 1)
   ret void
 }
 
-define void @test_memset_to_store(i8* %dest) {
+define void @test_memset_to_store(ptr %dest) {
 ; CHECK-LABEL: @test_memset_to_store(
-; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 1
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 2, i32 1)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 4, i32 1)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 8, i32 1)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 1 [[DEST]], i8 1, i32 16, i32 1)
+; CHECK-NEXT:    store atomic i8 1, ptr [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 1 [[DEST]], i8 1, i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 1 [[DEST]], i8 1, i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 1 [[DEST]], i8 1, i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 1 [[DEST]], i8 1, i32 16, i32 1)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 1, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 2, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 4, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 8, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 1 %dest, i8 1, i32 16, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 2, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 4, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 8, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 1 %dest, i8 1, i32 16, i32 1)
   ret void
 }
 
-define void @test_memset_to_store_2(i8* %dest) {
+define void @test_memset_to_store_2(ptr %dest) {
 ; CHECK-LABEL: @test_memset_to_store_2(
-; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 2
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 2
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 4, i32 2)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 8, i32 2)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 2 [[DEST]], i8 1, i32 16, i32 2)
+; CHECK-NEXT:    store atomic i8 1, ptr [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    store atomic i16 257, ptr [[DEST]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 2 [[DEST]], i8 1, i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 2 [[DEST]], i8 1, i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 2 [[DEST]], i8 1, i32 16, i32 2)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 1, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 2, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 4, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 8, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 2 %dest, i8 1, i32 16, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 2 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 2 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 2 %dest, i8 1, i32 4, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 2 %dest, i8 1, i32 8, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 2 %dest, i8 1, i32 16, i32 2)
   ret void
 }
 
-define void @test_memset_to_store_4(i8* %dest) {
+define void @test_memset_to_store_4(ptr %dest) {
 ; CHECK-LABEL: @test_memset_to_store_4(
-; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 4
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 4 [[DEST]], i8 1, i32 8, i32 4)
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 4 [[DEST]], i8 1, i32 16, i32 4)
+; CHECK-NEXT:    store atomic i8 1, ptr [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i16 257, ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 16843009, ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 4 [[DEST]], i8 1, i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 4 [[DEST]], i8 1, i32 16, i32 4)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 1, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 2, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 4, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 8, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 4 %dest, i8 1, i32 16, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 4 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 4 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 4 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 4 %dest, i8 1, i32 8, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 4 %dest, i8 1, i32 16, i32 4)
   ret void
 }
 
-define void @test_memset_to_store_8(i8* %dest) {
+define void @test_memset_to_store_8(ptr %dest) {
 ; CHECK-LABEL: @test_memset_to_store_8(
-; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 8
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 8 [[DEST]], i8 1, i32 16, i32 8)
+; CHECK-NEXT:    store atomic i8 1, ptr [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    store atomic i16 257, ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    store atomic i32 16843009, ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 72340172838076673, ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 8 [[DEST]], i8 1, i32 16, i32 8)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 1, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 2, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 4, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 8, i32 8)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 8 %dest, i8 1, i32 16, i32 8)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 8 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 8 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 8 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 8 %dest, i8 1, i32 8, i32 8)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 8 %dest, i8 1, i32 16, i32 8)
   ret void
 }
 
-define void @test_memset_to_store_16(i8* %dest) {
+define void @test_memset_to_store_16(ptr %dest) {
 ; CHECK-LABEL: @test_memset_to_store_16(
-; CHECK-NEXT:    store atomic i8 1, i8* [[DEST:%.*]] unordered, align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    store atomic i16 257, i16* [[TMP1]] unordered, align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    store atomic i32 16843009, i32* [[TMP2]] unordered, align 16
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    store atomic i64 72340172838076673, i64* [[TMP3]] unordered, align 16
-; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nonnull align 16 [[DEST]], i8 1, i32 16, i32 16)
+; CHECK-NEXT:    store atomic i8 1, ptr [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    store atomic i16 257, ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    store atomic i32 16843009, ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    store atomic i64 72340172838076673, ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i32(ptr nonnull align 16 [[DEST]], i8 1, i32 16, i32 16)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 1, i32 1)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 2, i32 2)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 4, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 8, i32 8)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 16, i32 16)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 1, i32 1)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 2, i32 2)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 4, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 8, i32 8)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 16, i32 16)
   ret void
 }
 
-declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeonly, i8, i32, i32) nounwind argmemonly
+declare void @llvm.memset.element.unordered.atomic.p0.i32(ptr nocapture writeonly, i8, i32, i32) nounwind argmemonly
 
 
 ;; =========================================
@@ -115,312 +106,276 @@ declare void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* nocapture writeo
 
 @gconst = constant [32 x i8] c"0123456789012345678901234567890\00"
 ; Check that a memmove from a global constant is converted into a memcpy
-define void @test_memmove_to_memcpy(i8* %dest) {
+define void @test_memmove_to_memcpy(ptr %dest) {
 ; CHECK-LABEL: @test_memmove_to_memcpy(
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 [[DEST:%.*]], i8* nonnull align 16 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 [[DEST:%.*]], ptr nonnull align 16 @gconst, i32 32, i32 1)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 getelementptr inbounds ([32 x i8], [32 x i8]* @gconst, i64 0, i64 0), i32 32, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 @gconst, i32 32, i32 1)
   ret void
 }
 
-define void @test_memmove_zero_length(i8* %dest, i8* %src) {
+define void @test_memmove_zero_length(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_zero_length(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 0, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 0, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 0, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 0, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 0, i32 16)
   ret void
 }
 
 ; memmove with src==dest is removed
-define void @test_memmove_removed(i8* %srcdest, i32 %sz) {
+define void @test_memmove_removed(ptr %srcdest, i32 %sz) {
 ; CHECK-LABEL: @test_memmove_removed(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %srcdest, ptr align 1 %srcdest, i32 %sz, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %srcdest, ptr align 2 %srcdest, i32 %sz, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %srcdest, ptr align 4 %srcdest, i32 %sz, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %srcdest, ptr align 8 %srcdest, i32 %sz, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %srcdest, ptr align 16 %srcdest, i32 %sz, i32 16)
   ret void
 }
 
 ; memmove with a small constant length is converted to a load/store pair
-define void @test_memmove_loadstore(i8* %dest, i8* %src) {
+define void @test_memmove_loadstore(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_loadstore(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 2, i32 1)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 4, i32 1)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 8, i32 1)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 16, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 1
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 16, i32 1)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 2, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 4, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 8, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 16, i32 1)
   ret void
 }
 
-define void @test_memmove_loadstore_2(i8* %dest, i8* %src) {
+define void @test_memmove_loadstore_2(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_loadstore_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 4, i32 2)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 8, i32 2)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 16, i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 2
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 2
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 16, i32 2)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 4, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 8, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 16, i32 2)
   ret void
 }
 
-define void @test_memmove_loadstore_4(i8* %dest, i8* %src) {
+define void @test_memmove_loadstore_4(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_loadstore_4(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 8, i32 4)
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 16, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 4
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 4 [[DEST]], ptr nonnull align 4 [[SRC]], i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 4 [[DEST]], ptr nonnull align 4 [[SRC]], i32 16, i32 4)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 8, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i32 4)
   ret void
 }
 
-define void @test_memmove_loadstore_8(i8* %dest, i8* %src) {
+define void @test_memmove_loadstore_8(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_loadstore_8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
-; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 8 [[DEST]], i8* nonnull align 8 [[SRC]], i32 16, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 8
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 [[TMP10]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 8 [[DEST]], ptr nonnull align 8 [[SRC]], i32 16, i32 8)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 8, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 16, i32 8)
   ret void
 }
 
-define void @test_memmove_loadstore_16(i8* %dest, i8* %src) {
+define void @test_memmove_loadstore_16(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memmove_loadstore_16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
-; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
-; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 16 [[DEST]], i8* nonnull align 16 [[SRC]], i32 16, i32 16)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 16
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i64 [[TMP10]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nonnull align 16 [[DEST]], ptr nonnull align 16 [[SRC]], i32 16, i32 16)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 1, i32 1)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 2, i32 2)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 4, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 8, i32 8)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 16, i32 16)
   ret void
 }
 
-declare void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
+declare void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i32) nounwind argmemonly
 
 ;; =========================================
 ;; ----- memcpy ------
 
-define void @test_memcpy_zero_length(i8* %dest, i8* %src) {
+define void @test_memcpy_zero_length(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_zero_length(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 0, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 0, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 0, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 0, i32 8)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 0, i32 16)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 0, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 0, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 0, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 0, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 0, i32 16)
   ret void
 }
 
 ; memcpy with src==dest is removed
-define void @test_memcpy_removed(i8* %srcdest, i32 %sz) {
+define void @test_memcpy_removed(ptr %srcdest, i32 %sz) {
 ; CHECK-LABEL: @test_memcpy_removed(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %srcdest, i8* align 1 %srcdest, i32 %sz, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %srcdest, i8* align 2 %srcdest, i32 %sz, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %srcdest, i8* align 4 %srcdest, i32 %sz, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %srcdest, i8* align 8 %srcdest, i32 %sz, i32 8)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %srcdest, i8* align 16 %srcdest, i32 %sz, i32 16)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %srcdest, ptr align 1 %srcdest, i32 %sz, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %srcdest, ptr align 2 %srcdest, i32 %sz, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %srcdest, ptr align 4 %srcdest, i32 %sz, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %srcdest, ptr align 8 %srcdest, i32 %sz, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %srcdest, ptr align 16 %srcdest, i32 %sz, i32 16)
   ret void
 }
 
 ; memcpy with a small constant length is converted to a load/store pair
-define void @test_memcpy_loadstore(i8* %dest, i8* %src) {
+define void @test_memcpy_loadstore(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_loadstore(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 1
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 1
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 2, i32 1)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 4, i32 1)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 8, i32 1)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 1 [[DEST]], i8* nonnull align 1 [[SRC]], i32 16, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 1
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 1
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 2, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 4, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 8, i32 1)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 1 [[DEST]], ptr nonnull align 1 [[SRC]], i32 16, i32 1)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 1, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 2, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 4, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 8, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 1 %dest, i8* align 1 %src, i32 16, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 2, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 4, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 8, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 1 %dest, ptr align 1 %src, i32 16, i32 1)
   ret void
 }
 
-define void @test_memcpy_loadstore_2(i8* %dest, i8* %src) {
+define void @test_memcpy_loadstore_2(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_loadstore_2(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 2
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 2
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 2
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 4, i32 2)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 8, i32 2)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 2 [[DEST]], i8* nonnull align 2 [[SRC]], i32 16, i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 2
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 2
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 2
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 4, i32 2)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 8, i32 2)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 2 [[DEST]], ptr nonnull align 2 [[SRC]], i32 16, i32 2)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 1, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 2, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 4, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 8, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 2 %dest, i8* align 2 %src, i32 16, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 4, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 8, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 2 %dest, ptr align 2 %src, i32 16, i32 2)
   ret void
 }
 
-define void @test_memcpy_loadstore_4(i8* %dest, i8* %src) {
+define void @test_memcpy_loadstore_4(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_loadstore_4(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 4
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 4
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 4
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 4
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 8, i32 4)
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 4 [[DEST]], i8* nonnull align 4 [[SRC]], i32 16, i32 4)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 4
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 4
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 4 [[DEST]], ptr nonnull align 4 [[SRC]], i32 8, i32 4)
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 4 [[DEST]], ptr nonnull align 4 [[SRC]], i32 16, i32 4)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 1, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 2, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 4, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 8, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 4 %dest, i8* align 4 %src, i32 16, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 8, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 4 %dest, ptr align 4 %src, i32 16, i32 4)
   ret void
 }
 
-define void @test_memcpy_loadstore_8(i8* %dest, i8* %src) {
+define void @test_memcpy_loadstore_8(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_loadstore_8(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 8
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 8
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 8
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 8
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 8
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 8
-; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 8
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 8 [[DEST]], i8* nonnull align 8 [[SRC]], i32 16, i32 8)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 8
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, ptr [[SRC]] unordered, align 8
+; CHECK-NEXT:    store atomic i64 [[TMP10]], ptr [[DEST]] unordered, align 8
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 8 [[DEST]], ptr nonnull align 8 [[SRC]], i32 16, i32 8)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 1, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 2, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 4, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 8, i32 8)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 8 %dest, i8* align 8 %src, i32 16, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 8, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 8 %dest, ptr align 8 %src, i32 16, i32 8)
   ret void
 }
 
-define void @test_memcpy_loadstore_16(i8* %dest, i8* %src) {
+define void @test_memcpy_loadstore_16(ptr %dest, ptr %src) {
 ; CHECK-LABEL: @test_memcpy_loadstore_16(
-; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, i8* [[SRC:%.*]] unordered, align 16
-; CHECK-NEXT:    store atomic i8 [[TMP1]], i8* [[DEST:%.*]] unordered, align 16
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[SRC]] to i16*
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[DEST]] to i16*
-; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, i16* [[TMP2]] unordered, align 16
-; CHECK-NEXT:    store atomic i16 [[TMP4]], i16* [[TMP3]] unordered, align 16
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i8* [[SRC]] to i32*
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[DEST]] to i32*
-; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, i32* [[TMP5]] unordered, align 16
-; CHECK-NEXT:    store atomic i32 [[TMP7]], i32* [[TMP6]] unordered, align 16
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[SRC]] to i64*
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8* [[DEST]] to i64*
-; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, i64* [[TMP8]] unordered, align 16
-; CHECK-NEXT:    store atomic i64 [[TMP10]], i64* [[TMP9]] unordered, align 16
-; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nonnull align 16 [[DEST]], i8* nonnull align 16 [[SRC]], i32 16, i32 16)
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i8, ptr [[SRC:%.*]] unordered, align 16
+; CHECK-NEXT:    store atomic i8 [[TMP1]], ptr [[DEST:%.*]] unordered, align 16
+; CHECK-NEXT:    [[TMP4:%.*]] = load atomic i16, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i16 [[TMP4]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    [[TMP7:%.*]] = load atomic i32, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i32 [[TMP7]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    [[TMP10:%.*]] = load atomic i64, ptr [[SRC]] unordered, align 16
+; CHECK-NEXT:    store atomic i64 [[TMP10]], ptr [[DEST]] unordered, align 16
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nonnull align 16 [[DEST]], ptr nonnull align 16 [[SRC]], i32 16, i32 16)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 1, i32 1)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 2, i32 2)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 4, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 8, i32 8)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 16, i32 16)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 1, i32 1)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 2, i32 2)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 4, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 8, i32 8)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 16, i32 16)
   ret void
 }
 
-define void @test_undefined(i8* %dest, i8* %src, i1 %c1) {
+define void @test_undefined(ptr %dest, ptr %src, i1 %c1) {
 ; CHECK-LABEL: @test_undefined(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[OK:%.*]], label [[UNDEFINED:%.*]]
 ; CHECK:       undefined:
-; CHECK-NEXT:    store i1 true, i1* poison, align 1
+; CHECK-NEXT:    store i1 true, ptr poison, align 1
 ; CHECK-NEXT:    br label [[OK]]
 ; CHECK:       ok:
 ; CHECK-NEXT:    ret void
@@ -428,15 +383,15 @@ define void @test_undefined(i8* %dest, i8* %src, i1 %c1) {
 entry:
   br i1 %c1, label %ok, label %undefined
 undefined:
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 7, i32 4)
-  call void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 -8, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 7, i32 4)
-  call void @llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32(i8* align 16 %dest, i8* align 16 %src, i32 -8, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 7, i32 4)
-  call void @llvm.memset.element.unordered.atomic.p0i8.i32(i8* align 16 %dest, i8 1, i32 -8, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 7, i32 4)
+  call void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 -8, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 7, i32 4)
+  call void @llvm.memmove.element.unordered.atomic.p0.p0.i32(ptr align 16 %dest, ptr align 16 %src, i32 -8, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 7, i32 4)
+  call void @llvm.memset.element.unordered.atomic.p0.i32(ptr align 16 %dest, i8 1, i32 -8, i32 4)
   br label %ok
 ok:
   ret void
 }
 
-declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i32) nounwind argmemonly
+declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i32(ptr nocapture writeonly, ptr nocapture readonly, i32, i32) nounwind argmemonly
diff --git a/llvm/test/Transforms/InstCombine/err-rep-cold.ll b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
index 3f69437c01d10..26df928e5c72a 100644
--- a/llvm/test/Transforms/InstCombine/err-rep-cold.ll
+++ b/llvm/test/Transforms/InstCombine/err-rep-cold.ll
@@ -3,11 +3,11 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+%struct._IO_FILE = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i64, i16, i8, [1 x i8], ptr, i64, ptr, ptr, ptr, ptr, i64, i32, [20 x i8] }
+%struct._IO_marker = type { ptr, ptr, i32 }
 
-@stdout = external global %struct._IO_FILE*
-@stderr = external global %struct._IO_FILE*
+@stdout = external global ptr
+@stderr = external global ptr
 @.str = private unnamed_addr constant [13 x i8] c"an error: %d\00", align 1
 @.str1 = private unnamed_addr constant [9 x i8] c"an error\00", align 1
 
@@ -18,18 +18,18 @@ entry:
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
-  %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #1
+  %0 = load ptr, ptr @stderr, align 8
+  %call = tail call i32 (ptr, ptr, ...) @fprintf(ptr %0, ptr @.str, i32 %a) #1
   br label %return
 
-; CHECK: %call = tail call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* nonnull getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), i32 %a) #[[$AT1:[0-9]+]]
+; CHECK: %call = tail call i32 (ptr, ptr, ...) @fprintf(ptr %0, ptr nonnull @.str, i32 %a) #[[$AT1:[0-9]+]]
 
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
   ret i32 %retval.0
 }
 
-declare i32 @fprintf(%struct._IO_FILE* nocapture, i8* nocapture readonly, ...) #1
+declare i32 @fprintf(ptr nocapture, ptr nocapture readonly, ...) #1
 
 define i32 @test2(i32 %a) #0 {
 ; CHECK-LABEL: @test2
@@ -38,18 +38,18 @@ entry:
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
-  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  %0 = load ptr, ptr @stderr, align 8
+  %1 = tail call i64 @fwrite(ptr @.str1, i64 8, i64 1, ptr %0)
   br label %return
 
-; CHECK: tail call i64 @fwrite(i8* nonnull getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[$AT2:[0-9]+]]
+; CHECK: tail call i64 @fwrite(ptr nonnull @.str1, i64 8, i64 1, ptr %0) #[[$AT2:[0-9]+]]
 
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
   ret i32 %retval.0
 }
 
-declare i64 @fwrite(i8* nocapture, i64, i64, %struct._IO_FILE* nocapture) #1
+declare i64 @fwrite(ptr nocapture, i64, i64, ptr nocapture) #1
 
 define i32 @test3(i32 %a) #0 {
 ; CHECK-LABEL: @test3
@@ -58,11 +58,11 @@ entry:
   br i1 %cmp, label %if.then, label %return
 
 if.then:                                          ; preds = %entry
-  %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stdout, align 8
-  %1 = tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0)
+  %0 = load ptr, ptr @stdout, align 8
+  %1 = tail call i64 @fwrite(ptr @.str1, i64 8, i64 1, ptr %0)
   br label %return
 
-; CHECK-NOT: tail call i64 @fwrite(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i64 0, i64 0), i64 8, i64 1, %struct._IO_FILE* %0) #[[$AT2]]
+; CHECK-NOT: tail call i64 @fwrite(ptr @.str1, i64 8, i64 1, ptr %0) #[[$AT2]]
 
 return:                                           ; preds = %entry, %if.then
   %retval.0 = phi i32 [ 1, %if.then ], [ 0, %entry ]
diff --git a/llvm/test/Transforms/InstCombine/fortify-folding.ll b/llvm/test/Transforms/InstCombine/fortify-folding.ll
index 5d698ffa3df04..a6b5dc90c3640 100644
--- a/llvm/test/Transforms/InstCombine/fortify-folding.ll
+++ b/llvm/test/Transforms/InstCombine/fortify-folding.ll
@@ -7,359 +7,299 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 @b = common global [60 x i8] zeroinitializer, align 1
 @.str = private constant [12 x i8] c"abcdefghijk\00"
 
-%struct.__va_list_tag = type { i32, i32, i8*, i8* }
+%struct.__va_list_tag = type { i32, i32, ptr, ptr }
 
-define i8* @test_memccpy() {
+define ptr @test_memccpy() {
 ; CHECK-LABEL: @test_memccpy(
-; CHECK-NEXT:    [[MEMCCPY:%.*]] = call i8* @memccpy(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60)
-; CHECK-NEXT:    ret i8* [[MEMCCPY]]
+; CHECK-NEXT:    [[MEMCCPY:%.*]] = call ptr @memccpy(ptr nonnull @a, ptr nonnull @b, i32 0, i64 60)
+; CHECK-NEXT:    ret ptr [[MEMCCPY]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
-  ret i8* %ret
+  %ret = call ptr @__memccpy_chk(ptr @a, ptr @b, i32 0, i64 60, i64 -1)
+  ret ptr %ret
 }
 
-define i8* @test_not_memccpy() {
+define ptr @test_not_memccpy() {
 ; CHECK-LABEL: @test_not_memccpy(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__memccpy_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60, i64 59)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__memccpy_chk(ptr nonnull @a, ptr nonnull @b, i32 0, i64 60, i64 59)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 59)
-  ret i8* %ret
+  %ret = call ptr @__memccpy_chk(ptr @a, ptr @b, i32 0, i64 60, i64 59)
+  ret ptr %ret
 }
 
-define i8* @test_memccpy_tail() {
+define ptr @test_memccpy_tail() {
 ; CHECK-LABEL: @test_memccpy_tail(
-; CHECK-NEXT:    [[MEMCCPY:%.*]] = tail call i8* @memccpy(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i32 0, i64 60)
-; CHECK-NEXT:    ret i8* [[MEMCCPY]]
+; CHECK-NEXT:    [[MEMCCPY:%.*]] = tail call ptr @memccpy(ptr nonnull @a, ptr nonnull @b, i32 0, i64 60)
+; CHECK-NEXT:    ret ptr [[MEMCCPY]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i8* @__memccpy_chk(i8* %dst, i8* %src, i32 0, i64 60, i64 -1)
-  ret i8* %ret
+  %ret = tail call ptr @__memccpy_chk(ptr @a, ptr @b, i32 0, i64 60, i64 -1)
+  ret ptr %ret
 }
 
-define i8* @test_mempcpy() {
+define ptr @test_mempcpy() {
 ; CHECK-LABEL: @test_mempcpy(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(15) @a, ptr noundef nonnull align 1 dereferenceable(15) @b, i64 15, i1 false)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([60 x i8], ptr @a, i64 0, i64 15)
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1)
-  ret i8* %ret
+  %ret = call ptr @__mempcpy_chk(ptr @a, ptr @b, i64 15, i64 -1)
+  ret ptr %ret
 }
 
-define i8* @test_not_mempcpy() {
+define ptr @test_not_mempcpy() {
 ; CHECK-LABEL: @test_not_mempcpy(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__mempcpy_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 60, i64 59)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__mempcpy_chk(ptr nonnull @a, ptr nonnull @b, i64 60, i64 59)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 60, i64 59)
-  ret i8* %ret
+  %ret = call ptr @__mempcpy_chk(ptr @a, ptr @b, i64 60, i64 59)
+  ret ptr %ret
 }
 
-define i8* @test_mempcpy_tail() {
+define ptr @test_mempcpy_tail() {
 ; CHECK-LABEL: @test_mempcpy_tail(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull align 1 dereferenceable(15) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 15, i1 false)
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 15)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(15) @a, ptr noundef nonnull align 1 dereferenceable(15) @b, i64 15, i1 false)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([60 x i8], ptr @a, i64 0, i64 15)
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i8* @__mempcpy_chk(i8* %dst, i8* %src, i64 15, i64 -1)
-  ret i8* %ret
+  %ret = tail call ptr @__mempcpy_chk(ptr @a, ptr @b, i64 15, i64 -1)
+  ret ptr %ret
 }
 
 define i32 @test_snprintf() {
 ; CHECK-LABEL: @test_snprintf(
-; CHECK-NEXT:    [[SNPRINTF:%.*]] = call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[SNPRINTF:%.*]] = call i32 (ptr, i64, ptr, ...) @snprintf(ptr nonnull dereferenceable(1) @a, i64 60, ptr nonnull @b)
 ; CHECK-NEXT:    ret i32 [[SNPRINTF]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt)
+  %ret = call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr @a, i64 60, i32 0, i64 -1, ptr @b)
   ret i32 %ret
 }
 
 define i32 @test_not_snprintf() {
 ; CHECK-LABEL: @test_not_snprintf(
-; CHECK-NEXT:    [[RET:%.*]] = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i32 0, i64 59, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
-; CHECK-NEXT:    [[IGN:%.*]] = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i32 1, i64 -1, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[RET:%.*]] = call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr nonnull @a, i64 60, i32 0, i64 59, ptr nonnull @b)
+; CHECK-NEXT:    [[IGN:%.*]] = call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr nonnull @a, i64 60, i32 1, i64 -1, ptr nonnull @b)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 59, i8* %fmt)
-  %ign = call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 1, i64 -1, i8* %fmt)
+  %ret = call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr @a, i64 60, i32 0, i64 59, ptr @b)
+  %ign = call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr @a, i64 60, i32 1, i64 -1, ptr @b)
   ret i32 %ret
 }
 
 define i32 @test_snprintf_tail() {
 ; CHECK-LABEL: @test_snprintf_tail(
-; CHECK-NEXT:    [[SNPRINTF:%.*]] = tail call i32 (i8*, i64, i8*, ...) @snprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 60, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[SNPRINTF:%.*]] = tail call i32 (ptr, i64, ptr, ...) @snprintf(ptr nonnull dereferenceable(1) @a, i64 60, ptr nonnull @b)
 ; CHECK-NEXT:    ret i32 [[SNPRINTF]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i32 (i8*, i64, i32, i64, i8*, ...) @__snprintf_chk(i8* %dst, i64 60, i32 0, i64 -1, i8* %fmt)
+  %ret = tail call i32 (ptr, i64, i32, i64, ptr, ...) @__snprintf_chk(ptr @a, i64 60, i32 0, i64 -1, ptr @b)
   ret i32 %ret
 }
 
 define i32 @test_sprintf() {
 ; CHECK-LABEL: @test_sprintf(
-; CHECK-NEXT:    [[SPRINTF:%.*]] = call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[SPRINTF:%.*]] = call i32 (ptr, ptr, ...) @sprintf(ptr nonnull dereferenceable(1) @a, ptr nonnull dereferenceable(1) @b)
 ; CHECK-NEXT:    ret i32 [[SPRINTF]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt)
+  %ret = call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr @a, i32 0, i64 -1, ptr @b)
   ret i32 %ret
 }
 
 define i32 @test_not_sprintf() {
 ; CHECK-LABEL: @test_not_sprintf(
-; CHECK-NEXT:    [[RET:%.*]] = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 0, i64 59, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
-; CHECK-NEXT:    [[IGNORED:%.*]] = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 1, i64 -1, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[RET:%.*]] = call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr nonnull @a, i32 0, i64 59, ptr nonnull @b)
+; CHECK-NEXT:    [[IGNORED:%.*]] = call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr nonnull @a, i32 1, i64 -1, ptr nonnull @b)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 59, i8* %fmt)
-  %ignored = call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 1, i64 -1, i8* %fmt)
+  %ret = call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr @a, i32 0, i64 59, ptr @b)
+  %ignored = call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr @a, i32 1, i64 -1, ptr @b)
   ret i32 %ret
 }
 
 define i32 @test_sprintf_tail() {
 ; CHECK-LABEL: @test_sprintf_tail(
-; CHECK-NEXT:    [[SPRINTF:%.*]] = tail call i32 (i8*, i8*, ...) @sprintf(i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
+; CHECK-NEXT:    [[SPRINTF:%.*]] = tail call i32 (ptr, ptr, ...) @sprintf(ptr nonnull dereferenceable(1) @a, ptr nonnull dereferenceable(1) @b)
 ; CHECK-NEXT:    ret i32 [[SPRINTF]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %fmt = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i32 (i8*, i32, i64, i8*, ...) @__sprintf_chk(i8* %dst, i32 0, i64 -1, i8* %fmt)
+  %ret = tail call i32 (ptr, i32, i64, ptr, ...) @__sprintf_chk(ptr @a, i32 0, i64 -1, ptr @b)
   ret i32 %ret
 }
 
-define i8* @test_strcat() {
+define ptr @test_strcat() {
 ; CHECK-LABEL: @test_strcat(
-; CHECK-NEXT:    [[STRCAT:%.*]] = call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+; CHECK-NEXT:    [[STRCAT:%.*]] = call ptr @strcat(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b)
+; CHECK-NEXT:    ret ptr @a
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1)
-  ret i8* %ret
+  %ret = call ptr @__strcat_chk(ptr @a, ptr @b, i64 -1)
+  ret ptr %ret
 }
 
-define i8* @test_not_strcat() {
+define ptr @test_not_strcat() {
 ; CHECK-LABEL: @test_not_strcat(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__strcat_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 0)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__strcat_chk(ptr nonnull @a, ptr nonnull @b, i64 0)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__strcat_chk(i8* %dst, i8* %src, i64 0)
-  ret i8* %ret
+  %ret = call ptr @__strcat_chk(ptr @a, ptr @b, i64 0)
+  ret ptr %ret
 }
 
-define i8* @test_strcat_tail() {
+define ptr @test_strcat_tail() {
 ; CHECK-LABEL: @test_strcat_tail(
-; CHECK-NEXT:    [[STRCAT:%.*]] = tail call i8* @strcat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0))
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+; CHECK-NEXT:    [[STRCAT:%.*]] = tail call ptr @strcat(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b)
+; CHECK-NEXT:    ret ptr @a
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i8* @__strcat_chk(i8* %dst, i8* %src, i64 -1)
-  ret i8* %ret
+  %ret = tail call ptr @__strcat_chk(ptr @a, ptr @b, i64 -1)
+  ret ptr %ret
 }
 
 define i64 @test_strlcat() {
 ; CHECK-LABEL: @test_strlcat(
-; CHECK-NEXT:    [[STRLCAT:%.*]] = call i64 @strlcat(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    [[STRLCAT:%.*]] = call i64 @strlcat(ptr nonnull @a, ptr nonnull @b, i64 22)
 ; CHECK-NEXT:    ret i64 [[STRLCAT]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  %ret = call i64 @__strlcat_chk(ptr @a, ptr @b, i64 22, i64 -1)
   ret i64 %ret
 }
 
 define i64 @test_not_strlcat() {
 ; CHECK-LABEL: @test_not_strlcat(
-; CHECK-NEXT:    [[RET:%.*]] = call i64 @__strlcat_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22, i64 0)
+; CHECK-NEXT:    [[RET:%.*]] = call i64 @__strlcat_chk(ptr nonnull @a, ptr nonnull @b, i64 22, i64 0)
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 0)
+  %ret = call i64 @__strlcat_chk(ptr @a, ptr @b, i64 22, i64 0)
   ret i64 %ret
 }
 
 define i64 @test_strlcat_tail() {
 ; CHECK-LABEL: @test_strlcat_tail(
-; CHECK-NEXT:    [[STRLCAT:%.*]] = tail call i64 @strlcat(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    [[STRLCAT:%.*]] = tail call i64 @strlcat(ptr nonnull @a, ptr nonnull @b, i64 22)
 ; CHECK-NEXT:    ret i64 [[STRLCAT]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i64 @__strlcat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  %ret = tail call i64 @__strlcat_chk(ptr @a, ptr @b, i64 22, i64 -1)
   ret i64 %ret
 }
 
-define i8* @test_strncat() {
+define ptr @test_strncat() {
 ; CHECK-LABEL: @test_strncat(
-; CHECK-NEXT:    [[STRNCAT:%.*]] = call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+; CHECK-NEXT:    [[STRNCAT:%.*]] = call ptr @strncat(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b, i64 22)
+; CHECK-NEXT:    ret ptr @a
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
-  ret i8* %ret
+  %ret = call ptr @__strncat_chk(ptr @a, ptr @b, i64 22, i64 -1)
+  ret ptr %ret
 }
 
-define i8* @test_not_strncat() {
+define ptr @test_not_strncat() {
 ; CHECK-LABEL: @test_not_strncat(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__strncat_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22, i64 3)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__strncat_chk(ptr nonnull @a, ptr nonnull @b, i64 22, i64 3)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 3)
-  ret i8* %ret
+  %ret = call ptr @__strncat_chk(ptr @a, ptr @b, i64 22, i64 3)
+  ret ptr %ret
 }
 
-define i8* @test_strncat_tail() {
+define ptr @test_strncat_tail() {
 ; CHECK-LABEL: @test_strncat_tail(
-; CHECK-NEXT:    [[STRNCAT:%.*]] = tail call i8* @strncat(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0)
+; CHECK-NEXT:    [[STRNCAT:%.*]] = tail call ptr @strncat(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b, i64 22)
+; CHECK-NEXT:    ret ptr @a
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i8* @__strncat_chk(i8* %dst, i8* %src, i64 22, i64 -1)
-  ret i8* %ret
+  %ret = tail call ptr @__strncat_chk(ptr @a, ptr @b, i64 22, i64 -1)
+  ret ptr %ret
 }
 
 define i64 @test_strlcpy() {
 ; CHECK-LABEL: @test_strlcpy(
-; CHECK-NEXT:    [[STRLCPY:%.*]] = call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    [[STRLCPY:%.*]] = call i64 @strlcpy(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b, i64 22)
 ; CHECK-NEXT:    ret i64 [[STRLCPY]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  %ret = call i64 @__strlcpy_chk(ptr @a, ptr @b, i64 22, i64 -1)
   ret i64 %ret
 }
 
 define i64 @test_not_strlcpy() {
 ; CHECK-LABEL: @test_not_strlcpy(
-; CHECK-NEXT:    [[RET:%.*]] = call i64 @__strlcpy_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22, i64 2)
+; CHECK-NEXT:    [[RET:%.*]] = call i64 @__strlcpy_chk(ptr nonnull @a, ptr nonnull @b, i64 22, i64 2)
 ; CHECK-NEXT:    ret i64 [[RET]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 2)
+  %ret = call i64 @__strlcpy_chk(ptr @a, ptr @b, i64 22, i64 2)
   ret i64 %ret
 }
 
 define i64 @test_strlcpy_tail() {
 ; CHECK-LABEL: @test_strlcpy_tail(
-; CHECK-NEXT:    [[STRLCPY:%.*]] = tail call i64 @strlcpy(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), i64 22)
+; CHECK-NEXT:    [[STRLCPY:%.*]] = tail call i64 @strlcpy(ptr noundef nonnull dereferenceable(1) @a, ptr noundef nonnull dereferenceable(1) @b, i64 22)
 ; CHECK-NEXT:    ret i64 [[STRLCPY]]
 ;
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i64 @__strlcpy_chk(i8* %dst, i8* %src, i64 22, i64 -1)
+  %ret = tail call i64 @__strlcpy_chk(ptr @a, ptr @b, i64 22, i64 -1)
   ret i64 %ret
 }
 
 define i32 @test_vsnprintf() {
 ; CHECK-LABEL: @test_vsnprintf(
-; CHECK-NEXT:    [[VSNPRINTF:%.*]] = call i32 @vsnprintf(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[VSNPRINTF:%.*]] = call i32 @vsnprintf(ptr nonnull @a, i64 4, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[VSNPRINTF]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = call i32 @__vsnprintf_chk(ptr @a, i64 4, i32 0, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
 define i32 @test_not_vsnprintf() {
 ; CHECK-LABEL: @test_not_vsnprintf(
-; CHECK-NEXT:    [[RET:%.*]] = call i32 @__vsnprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i32 0, i64 3, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
-; CHECK-NEXT:    [[IGN:%.*]] = call i32 @__vsnprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i32 1, i64 -1, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @__vsnprintf_chk(ptr nonnull @a, i64 4, i32 0, i64 3, ptr nonnull @b, ptr null)
+; CHECK-NEXT:    [[IGN:%.*]] = call i32 @__vsnprintf_chk(ptr nonnull @a, i64 4, i32 1, i64 -1, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 3, i8* %src, %struct.__va_list_tag* null)
-  %ign = call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 1, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = call i32 @__vsnprintf_chk(ptr @a, i64 4, i32 0, i64 3, ptr @b, ptr null)
+  %ign = call i32 @__vsnprintf_chk(ptr @a, i64 4, i32 1, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
 define i32 @test_vsnprintf_tail() {
 ; CHECK-LABEL: @test_vsnprintf_tail(
-; CHECK-NEXT:    [[VSNPRINTF:%.*]] = tail call i32 @vsnprintf(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i64 4, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[VSNPRINTF:%.*]] = tail call i32 @vsnprintf(ptr nonnull @a, i64 4, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[VSNPRINTF]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i32 @__vsnprintf_chk(i8* %dst, i64 4, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = tail call i32 @__vsnprintf_chk(ptr @a, i64 4, i32 0, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
 define i32 @test_vsprintf() {
 ; CHECK-LABEL: @test_vsprintf(
-; CHECK-NEXT:    [[VSPRINTF:%.*]] = call i32 @vsprintf(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[VSPRINTF:%.*]] = call i32 @vsprintf(ptr nonnull @a, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[VSPRINTF]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = call i32 @__vsprintf_chk(ptr @a, i32 0, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
 define i32 @test_not_vsprintf() {
 ; CHECK-LABEL: @test_not_vsprintf(
-; CHECK-NEXT:    [[RET:%.*]] = call i32 @__vsprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 0, i64 3, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
-; CHECK-NEXT:    [[IGN:%.*]] = call i32 @__vsprintf_chk(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i32 1, i64 -1, i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @__vsprintf_chk(ptr nonnull @a, i32 0, i64 3, ptr nonnull @b, ptr null)
+; CHECK-NEXT:    [[IGN:%.*]] = call i32 @__vsprintf_chk(ptr nonnull @a, i32 1, i64 -1, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 3, i8* %src, %struct.__va_list_tag* null)
-  %ign = call i32 @__vsprintf_chk(i8* %dst, i32 1, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = call i32 @__vsprintf_chk(ptr @a, i32 0, i64 3, ptr @b, ptr null)
+  %ign = call i32 @__vsprintf_chk(ptr @a, i32 1, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
 define i32 @test_vsprintf_tail() {
 ; CHECK-LABEL: @test_vsprintf_tail(
-; CHECK-NEXT:    [[VSPRINTF:%.*]] = tail call i32 @vsprintf(i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @a, i64 0, i64 0), i8* nonnull getelementptr inbounds ([60 x i8], [60 x i8]* @b, i64 0, i64 0), %struct.__va_list_tag* null)
+; CHECK-NEXT:    [[VSPRINTF:%.*]] = tail call i32 @vsprintf(ptr nonnull @a, ptr nonnull @b, ptr null)
 ; CHECK-NEXT:    ret i32 [[VSPRINTF]]
 ;
   ; ret i32
-  %dst = getelementptr inbounds [60 x i8], [60 x i8]* @a, i32 0, i32 0
-  %src = getelementptr inbounds [60 x i8], [60 x i8]* @b, i32 0, i32 0
-  %ret = tail call i32 @__vsprintf_chk(i8* %dst, i32 0, i64 -1, i8* %src, %struct.__va_list_tag* null)
+  %ret = tail call i32 @__vsprintf_chk(ptr @a, i32 0, i64 -1, ptr @b, ptr null)
   ret i32 %ret
 }
 
-declare i8* @__mempcpy_chk(i8*, i8*, i64, i64)
-declare i8* @__memccpy_chk(i8*, i8*, i32, i64, i64)
-declare i32 @__snprintf_chk(i8*, i64, i32, i64, i8*, ...)
-declare i32 @__sprintf_chk(i8*, i32, i64, i8*, ...)
-declare i8* @__strcat_chk(i8*, i8*, i64)
-declare i64 @__strlcat_chk(i8*, i8*, i64, i64)
-declare i8* @__strncat_chk(i8*, i8*, i64, i64)
-declare i64 @__strlcpy_chk(i8*, i8*, i64, i64)
-declare i32 @__vsnprintf_chk(i8*, i64, i32, i64, i8*, %struct.__va_list_tag*)
-declare i32 @__vsprintf_chk(i8*, i32, i64, i8*, %struct.__va_list_tag*)
+declare ptr @__mempcpy_chk(ptr, ptr, i64, i64)
+declare ptr @__memccpy_chk(ptr, ptr, i32, i64, i64)
+declare i32 @__snprintf_chk(ptr, i64, i32, i64, ptr, ...)
+declare i32 @__sprintf_chk(ptr, i32, i64, ptr, ...)
+declare ptr @__strcat_chk(ptr, ptr, i64)
+declare i64 @__strlcat_chk(ptr, ptr, i64, i64)
+declare ptr @__strncat_chk(ptr, ptr, i64, i64)
+declare i64 @__strlcpy_chk(ptr, ptr, i64, i64)
+declare i32 @__vsnprintf_chk(ptr, i64, i32, i64, ptr, ptr)
+declare i32 @__vsprintf_chk(ptr, i32, i64, ptr, ptr)
diff --git a/llvm/test/Transforms/InstCombine/fputs-1.ll b/llvm/test/Transforms/InstCombine/fputs-1.ll
index f5f23dab4aaa9..3907532d6b564 100644
--- a/llvm/test/Transforms/InstCombine/fputs-1.ll
+++ b/llvm/test/Transforms/InstCombine/fputs-1.ll
@@ -11,37 +11,34 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 @A = constant [2 x i8] c"A\00"
 @hello = constant [7 x i8] c"hello\0A\00"
 
-declare i32 @fputs(i8*, %FILE*)
+declare i32 @fputs(ptr, ptr)
 
 ; Check fputs(str, fp) --> fwrite(str, strlen(s), 1, fp).
 
-define void @test_simplify1(%FILE* %fp) {
+define void @test_simplify1(ptr %fp) {
 ; CHECK-LABEL: @test_simplify1(
 ; CHECK-NEXT:    ret void
 ;
-  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
-  call i32 @fputs(i8* %str, %FILE* %fp)
+  call i32 @fputs(ptr @empty, ptr %fp)
   ret void
 }
 
 ; NOTE: The fwrite simplifier simplifies this further to fputc.
 
-define void @test_simplify2(%FILE* %fp) {
+define void @test_simplify2(ptr %fp) {
 ; CHECK-LABEL: @test_simplify2(
-; CHECK-NEXT:    [[FPUTC:%.*]] = call i32 @fputc(i32 65, %FILE* [[FP:%.*]])
+; CHECK-NEXT:    [[FPUTC:%.*]] = call i32 @fputc(i32 65, ptr [[FP:%.*]])
 ; CHECK-NEXT:    ret void
 ;
-  %str = getelementptr [2 x i8], [2 x i8]* @A, i32 0, i32 0
-  call i32 @fputs(i8* %str, %FILE* %fp)
+  call i32 @fputs(ptr @A, ptr %fp)
   ret void
 }
 
-define void @test_simplify3(%FILE* %fp) {
+define void @test_simplify3(ptr %fp) {
 ; CHECK-LABEL: @test_simplify3(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @fwrite(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @hello, i32 0, i32 0), i32 6, i32 1, %FILE* [[FP:%.*]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @fwrite(ptr nonnull @hello, i32 6, i32 1, ptr [[FP:%.*]])
 ; CHECK-NEXT:    ret void
 ;
-  %str = getelementptr [7 x i8], [7 x i8]* @hello, i32 0, i32 0
-  call i32 @fputs(i8* %str, %FILE* %fp)
+  call i32 @fputs(ptr @hello, ptr %fp)
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 4d8c1f63ea77e..b1b77bf2b89fd 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -300,7 +300,7 @@ entry:
   %broadcast.splat = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert, <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
   %broadcast.value = insertelement <vscale x 4 x i16> poison, i16 %val, i32 0
   %broadcast.splatvalue = shufflevector <vscale x 4 x i16> %broadcast.value, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
-  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %broadcast.splatvalue, <vscale x 4 x ptr> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> zeroinitializer , i1 true, i32 0), <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> zeroinitializer))
+  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %broadcast.splatvalue, <vscale x 4 x ptr> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> zeroinitializer , i1 true, i32 0), <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> zeroinitializer))
   ret void
 }
 
@@ -336,7 +336,7 @@ entry:
   %broadcast.splatinsert = insertelement <vscale x 4 x ptr> poison, ptr %dst, i32 0
   %broadcast.splat = shufflevector <vscale x 4 x ptr> %broadcast.splatinsert, <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
   %wide.load = load <vscale x 4 x i16>, ptr %src, align 2
-  call void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16> %wide.load, <vscale x 4 x ptr> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+  call void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16> %wide.load, <vscale x 4 x ptr> %broadcast.splat, i32 2, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i32 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
   ret void
 }
 
@@ -375,7 +375,7 @@ define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_m
 
 ; Function Attrs:
 declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32 immarg, <4 x i1>)
-declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32 immarg, <vscale x 4 x i1>)
+declare void @llvm.masked.scatter.nxv4i16.nxv4p0(<vscale x 4 x i16>, <vscale x 4 x ptr>, i32 immarg, <vscale x 4 x i1>)
 
 ; Test gathers that can be simplified to scalar load + splat
 
diff --git a/llvm/test/Transforms/InstCombine/memccpy.ll b/llvm/test/Transforms/InstCombine/memccpy.ll
index c6e4005b629de..f3714a99f320c 100644
--- a/llvm/test/Transforms/InstCombine/memccpy.ll
+++ b/llvm/test/Transforms/InstCombine/memccpy.ll
@@ -6,265 +6,263 @@
 @StopCharAfterNulTerminator = private constant [12 x i8] c"helloworld\00x", align 1
 @StringWithEOF =  constant [14 x i8] c"helloworld\FFab\00", align 1
 
-declare i8* @memccpy(i8*, i8*, i32, i64)
+declare ptr @memccpy(ptr, ptr, i32, i64)
 
-define i8* @memccpy_to_memcpy(i8* %dst) {
+define ptr @memccpy_to_memcpy(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST:%.*]] to i64*
-; CHECK-NEXT:    store i64 8245940763182785896, i64* [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 8
-; CHECK-NEXT:    ret i8* [[TMP2]]
+; CHECK-NEXT:    store i64 8245940763182785896, ptr [[DST:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) ; 114 is 'r'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 12) ; 114 is 'r'
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy2(i8* %dst) {
+define ptr @memccpy_to_memcpy2(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy2(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST:%.*]] to i64*
-; CHECK-NEXT:    store i64 8245940763182785896, i64* [[TMP1]], align 1
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 8
-; CHECK-NEXT:    ret i8* [[TMP2]]
+; CHECK-NEXT:    store i64 8245940763182785896, ptr [[DST:%.*]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8
+; CHECK-NEXT:    ret ptr [[TMP2]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8); ; 114 is 'r'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 8); ; 114 is 'r'
+  ret ptr %call
 }
 
-define void @memccpy_to_memcpy3(i8* %dst) {
+define void @memccpy_to_memcpy3(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy3(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(5) @hello, i64 5, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o'
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 111, i64 10) ; 111 is 'o'
   ret void
 }
 
-define void @memccpy_to_memcpy3_tail(i8* %dst) {
+define void @memccpy_to_memcpy3_tail(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy3_tail(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false)
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(5) @hello, i64 5, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o'
+  %call = tail call ptr @memccpy(ptr %dst, ptr @hello, i32 111, i64 10) ; 111 is 'o'
   ret void
 }
 
-define i8* @memccpy_to_memcpy3_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+define ptr @memccpy_to_memcpy3_musttail(ptr %dst, ptr %x, i32 %y, i64 %z) {
 ; CHECK-LABEL: @memccpy_to_memcpy3_musttail(
-; CHECK-NEXT:    [[CALL:%.*]] = musttail call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = musttail call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 111, i64 10)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 111, i64 10) ; 111 is 'o'
-  ret i8* %call
+  %call = musttail call ptr @memccpy(ptr %dst, ptr @hello, i32 111, i64 10) ; 111 is 'o'
+  ret ptr %call
 }
 
 
-define void @memccpy_to_memcpy4(i8* %dst) {
+define void @memccpy_to_memcpy4(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy4(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @hello, i64 11, i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 0, i64 12)
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 0, i64 12)
   ret void
 }
 
-define i8* @memccpy_to_memcpy5(i8* %dst) {
+define ptr @memccpy_to_memcpy5(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy5(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(7) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 7, i1 false)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(7) @hello, i64 7, i1 false)
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 7)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy5_tail(i8* %dst) {
+define ptr @memccpy_to_memcpy5_tail(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy5_tail(
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(7) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 7, i1 false)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(7) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(7) @hello, i64 7, i1 false)
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = tail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
-  ret i8* %call
+  %call = tail call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 7)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy5_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+define ptr @memccpy_to_memcpy5_musttail(ptr %dst, ptr %x, i32 %y, i64 %z) {
 ; CHECK-LABEL: @memccpy_to_memcpy5_musttail(
-; CHECK-NEXT:    [[CALL:%.*]] = musttail call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = musttail call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 114, i64 7)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 7)
-  ret i8* %call
+  %call = musttail call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 7)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy6(i8* %dst) {
+define ptr @memccpy_to_memcpy6(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy6(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(6) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(6) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 6, i1 false)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(6) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(6) @hello, i64 6, i1 false)
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 6);
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 6);
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy7(i8* %dst) {
+define ptr @memccpy_to_memcpy7(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy7(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 5, i1 false)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(5) @hello, i64 5, i1 false)
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 5) ; 115 is 's'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 115, i64 5) ; 115 is 's'
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy8(i8* %dst) {
+define ptr @memccpy_to_memcpy8(ptr %dst) {
 ; CHECK-LABEL: @memccpy_to_memcpy8(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i64 11, i1 false)
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @hello, i64 11, i1 false)
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 11) ; 115 is 's'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 115, i64 11) ; 115 is 's'
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy9(i8* %dst, i64 %n) {
+define ptr @memccpy_to_memcpy9(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy9(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(12) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(12) getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i64 12, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 12
-; CHECK-NEXT:    ret i8* [[TMP1]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(12) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(12) @StopCharAfterNulTerminator, i64 12, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 12
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 15) ; 120 is 'x'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @StopCharAfterNulTerminator, i32 120, i64 15) ; 120 is 'x'
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy10(i8* %dst, i64 %n) {
+define ptr @memccpy_to_memcpy10(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy10(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11
-; CHECK-NEXT:    ret i8* [[TMP1]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 255, i64 15)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 255, i64 15)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy11(i8* %dst, i64 %n) {
+define ptr @memccpy_to_memcpy11(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy11(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11
-; CHECK-NEXT:    ret i8* [[TMP1]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 -1, i64 15)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 -1, i64 15)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy12(i8* %dst, i64 %n) {
+define ptr @memccpy_to_memcpy12(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @memccpy_to_memcpy12(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], i8* noundef nonnull align 1 dereferenceable(11) getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i64 11, i1 false)
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 11
-; CHECK-NEXT:    ret i8* [[TMP1]]
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(11) [[DST:%.*]], ptr noundef nonnull align 1 dereferenceable(11) @StringWithEOF, i64 11, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 11
+; CHECK-NEXT:    ret ptr [[TMP1]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([14 x i8], [14 x i8]* @StringWithEOF, i64 0, i64 0), i32 1023, i64 15)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @StringWithEOF, i32 1023, i64 15)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_null(i8* %dst, i8* %src, i32 %c) {
+define ptr @memccpy_to_null(ptr %dst, ptr %src, i32 %c) {
 ; CHECK-LABEL: @memccpy_to_null(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* %src, i32 %c, i64 0)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr %src, i32 %c, i64 0)
+  ret ptr %call
 }
 
-define void @memccpy_dst_src_same_retval_unused(i8* %dst, i32 %c, i64 %n) {
+define void @memccpy_dst_src_same_retval_unused(ptr %dst, i32 %c, i64 %n) {
 ; CHECK-LABEL: @memccpy_dst_src_same_retval_unused(
 ; CHECK-NEXT:    ret void
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n)
+  %call = call ptr @memccpy(ptr %dst, ptr %dst, i32 %c, i64 %n)
   ret void
 }
 
 ; Negative tests
-define i8* @unknown_src(i8* %dst, i8* %src) {
+define ptr @unknown_src(ptr %dst, ptr %src) {
 ; CHECK-LABEL: @unknown_src(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* [[SRC:%.*]], i32 114, i64 12)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr [[SRC:%.*]], i32 114, i64 12)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* %src, i32 114, i64 12)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr %src, i32 114, i64 12)
+  ret ptr %call
 }
 
-define i8* @unknown_stop_char(i8* %dst, i32 %c) {
+define ptr @unknown_stop_char(ptr %dst, i32 %c) {
 ; CHECK-LABEL: @unknown_stop_char(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 [[C:%.*]], i64 12)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 [[C:%.*]], i64 12)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 %c, i64 12)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 %c, i64 12)
+  ret ptr %call
 }
 
-define i8* @unknown_size_n(i8* %dst, i64 %n) {
+define ptr @unknown_size_n(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @unknown_size_n(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 114, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 %n)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 %n)
+  ret ptr %call
 }
 
-define i8* @no_nul_terminator(i8* %dst, i64 %n) {
+define ptr @no_nul_terminator(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @no_nul_terminator(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @StopCharAfterNulTerminator, i32 120, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([12 x i8], [12 x i8]* @StopCharAfterNulTerminator, i64 0, i64 0), i32 120, i64 %n) ; 120 is 'x'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @StopCharAfterNulTerminator, i32 120, i64 %n) ; 120 is 'x'
+  ret ptr %call
 }
 
-define i8* @possibly_valid_data_after_array(i8* %dst, i64 %n) {
+define ptr @possibly_valid_data_after_array(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @possibly_valid_data_after_array(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([10 x i8], [10 x i8]* @NoNulTerminator, i64 0, i64 0), i32 115, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @NoNulTerminator, i32 115, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @NoNulTerminator, i64 0, i64 0), i32 115, i64 %n) ; 115 is 's'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @NoNulTerminator, i32 115, i64 %n) ; 115 is 's'
+  ret ptr %call
 }
 
-define i8* @possibly_valid_data_after_array2(i8* %dst, i64 %n) {
+define ptr @possibly_valid_data_after_array2(ptr %dst, i64 %n) {
 ; CHECK-LABEL: @possibly_valid_data_after_array2(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 115, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 %n) ; 115 is 's'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 115, i64 %n) ; 115 is 's'
+  ret ptr %call
 }
 
-define i8* @possibly_valid_data_after_array3(i8* %dst) {
+define ptr @possibly_valid_data_after_array3(ptr %dst) {
 ; CHECK-LABEL: @possibly_valid_data_after_array3(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 12)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 115, i64 12)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 115, i64 12) ; 115 is 's'
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr @hello, i32 115, i64 12) ; 115 is 's'
+  ret ptr %call
 }
 
-define i8* @memccpy_dst_src_same_retval_used(i8* %dst, i32 %c, i64 %n) {
+define ptr @memccpy_dst_src_same_retval_used(ptr %dst, i32 %c, i64 %n) {
 ; CHECK-LABEL: @memccpy_dst_src_same_retval_used(
-; CHECK-NEXT:    [[CALL:%.*]] = call i8* @memccpy(i8* [[DST:%.*]], i8* [[DST]], i32 [[C:%.*]], i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @memccpy(ptr [[DST:%.*]], ptr [[DST]], i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = call i8* @memccpy(i8* %dst, i8* %dst, i32 %c, i64 %n)
-  ret i8* %call
+  %call = call ptr @memccpy(ptr %dst, ptr %dst, i32 %c, i64 %n)
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+define ptr @memccpy_to_memcpy_musttail(ptr %dst, ptr %x, i32 %y, i64 %z) {
 ; CHECK-LABEL: @memccpy_to_memcpy_musttail(
-; CHECK-NEXT:    [[CALL:%.*]] = musttail call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = musttail call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 114, i64 12)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 12) ; 114 is 'r'
-  ret i8* %call
+  %call = musttail call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 12) ; 114 is 'r'
+  ret ptr %call
 }
 
-define i8* @memccpy_to_memcpy2_musttail(i8* %dst, i8* %x, i32 %y, i64 %z) {
+define ptr @memccpy_to_memcpy2_musttail(ptr %dst, ptr %x, i32 %y, i64 %z) {
 ; CHECK-LABEL: @memccpy_to_memcpy2_musttail(
-; CHECK-NEXT:    [[CALL:%.*]] = musttail call i8* @memccpy(i8* [[DST:%.*]], i8* nonnull getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8)
-; CHECK-NEXT:    ret i8* [[CALL]]
+; CHECK-NEXT:    [[CALL:%.*]] = musttail call ptr @memccpy(ptr [[DST:%.*]], ptr nonnull @hello, i32 114, i64 8)
+; CHECK-NEXT:    ret ptr [[CALL]]
 ;
-  %call = musttail call i8* @memccpy(i8* %dst, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @hello, i64 0, i64 0), i32 114, i64 8) ; 114 is 'r'
-  ret i8* %call
+  %call = musttail call ptr @memccpy(ptr %dst, ptr @hello, i32 114, i64 8) ; 114 is 'r'
+  ret ptr %call
 }
 
diff --git a/llvm/test/Transforms/InstCombine/memcmp-5.ll b/llvm/test/Transforms/InstCombine/memcmp-5.ll
index 907cbbd97218d..d857b01836cad 100644
--- a/llvm/test/Transforms/InstCombine/memcmp-5.ll
+++ b/llvm/test/Transforms/InstCombine/memcmp-5.ll
@@ -4,7 +4,7 @@
 ; Exercise folding of memcmp calls with constant arrays and nonconstant
 ; sizes.
 
-declare i32 @memcmp(i8*, i8*, i64)
+declare i32 @memcmp(ptr, ptr, i64)
 
 @ax = external constant [8 x i8]
 @a01230123 = constant [8 x i8] c"01230123"
@@ -15,68 +15,65 @@ declare i32 @memcmp(i8*, i8*, i64)
 
 ; Exercise memcmp(A, B, N) folding of arrays with the same bytes.
 
-define void @fold_memcmp_a_b_n(i32* %pcmp, i64 %n) {
+define void @fold_memcmp_a_b_n(ptr %pcmp, i64 %n) {
 ; CHECK-LABEL: @fold_memcmp_a_b_n(
-; CHECK-NEXT:    store i32 0, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[PCMP:%.*]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
-; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[S0_1]], align 4
+; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, ptr [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[S0_1]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
-; CHECK-NEXT:    [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
-; CHECK-NEXT:    store i32 [[TMP4]], i32* [[S0_2]], align 4
+; CHECK-NEXT:    [[S0_2:%.*]] = getelementptr i32, ptr [[PCMP]], i64 2
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[S0_2]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
-; CHECK-NEXT:    [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[S0_3]], align 4
-; CHECK-NEXT:    [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
-; CHECK-NEXT:    store i32 0, i32* [[S0_4]], align 4
+; CHECK-NEXT:    [[S0_3:%.*]] = getelementptr i32, ptr [[PCMP]], i64 3
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[S0_3]], align 4
+; CHECK-NEXT:    [[S0_4:%.*]] = getelementptr i32, ptr [[PCMP]], i64 4
+; CHECK-NEXT:    store i32 0, ptr [[S0_4]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i32
-; CHECK-NEXT:    [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
-; CHECK-NEXT:    store i32 [[TMP8]], i32* [[S0_5]], align 4
+; CHECK-NEXT:    [[S0_5:%.*]] = getelementptr i32, ptr [[PCMP]], i64 5
+; CHECK-NEXT:    store i32 [[TMP8]], ptr [[S0_5]], align 4
 ; CHECK-NEXT:    ret void
 ;
 
-  %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
 
-  %q0 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 0
-  %q1 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 1
-  %q2 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 2
-  %q3 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 3
-  %q4 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 4
-  %q5 = getelementptr [8 x i8], [8 x i8]* @b01230123, i64 0, i64 5
+  %q1 = getelementptr [8 x i8], ptr @b01230123, i64 0, i64 1
+  %q2 = getelementptr [8 x i8], ptr @b01230123, i64 0, i64 2
+  %q3 = getelementptr [8 x i8], ptr @b01230123, i64 0, i64 3
+  %q4 = getelementptr [8 x i8], ptr @b01230123, i64 0, i64 4
+  %q5 = getelementptr [8 x i8], ptr @b01230123, i64 0, i64 5
 
   ; Fold memcmp(a, b, n) to 0.
-  %c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
-  %s0_0 = getelementptr i32, i32* %pcmp, i64 0
-  store i32 %c0_0, i32* %s0_0
+  %c0_0 = call i32 @memcmp(ptr @a01230123, ptr @b01230123, i64 %n)
+  store i32 %c0_0, ptr %pcmp
 
   ; Fold memcmp(a, b + 1, n) to N != 0 ? -1 : 0.
-  %c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
-  %s0_1 = getelementptr i32, i32* %pcmp, i64 1
-  store i32 %c0_1, i32* %s0_1
+  %c0_1 = call i32 @memcmp(ptr @a01230123, ptr %q1, i64 %n)
+  %s0_1 = getelementptr i32, ptr %pcmp, i64 1
+  store i32 %c0_1, ptr %s0_1
 
   ; Fold memcmp(a, b + 2, n) to N != 0 ? -1 : 0.
-  %c0_2 = call i32 @memcmp(i8* %p0, i8* %q2, i64 %n)
-  %s0_2 = getelementptr i32, i32* %pcmp, i64 2
-  store i32 %c0_2, i32* %s0_2
+  %c0_2 = call i32 @memcmp(ptr @a01230123, ptr %q2, i64 %n)
+  %s0_2 = getelementptr i32, ptr %pcmp, i64 2
+  store i32 %c0_2, ptr %s0_2
 
   ; Fold memcmp(a, b + 3, n) to N != 0 ? -1 : 0.
-  %c0_3 = call i32 @memcmp(i8* %p0, i8* %q3, i64 %n)
-  %s0_3 = getelementptr i32, i32* %pcmp, i64 3
-  store i32 %c0_3, i32* %s0_3
+  %c0_3 = call i32 @memcmp(ptr @a01230123, ptr %q3, i64 %n)
+  %s0_3 = getelementptr i32, ptr %pcmp, i64 3
+  store i32 %c0_3, ptr %s0_3
 
   ; Fold memcmp(a, b + 4, n) to 0.
-  %c0_4 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
-  %s0_4 = getelementptr i32, i32* %pcmp, i64 4
-  store i32 %c0_4, i32* %s0_4
+  %c0_4 = call i32 @memcmp(ptr @a01230123, ptr %q4, i64 %n)
+  %s0_4 = getelementptr i32, ptr %pcmp, i64 4
+  store i32 %c0_4, ptr %s0_4
 
   ; Fold memcmp(a, b + 5, n) to N != 0 ? -1 : 0.
-  %c0_5 = call i32 @memcmp(i8* %p0, i8* %q5, i64 %n)
-  %s0_5 = getelementptr i32, i32* %pcmp, i64 5
-  store i32 %c0_5, i32* %s0_5
+  %c0_5 = call i32 @memcmp(ptr @a01230123, ptr %q5, i64 %n)
+  %s0_5 = getelementptr i32, ptr %pcmp, i64 5
+  store i32 %c0_5, ptr %s0_5
 
   ret void
 }
@@ -84,20 +81,17 @@ define void @fold_memcmp_a_b_n(i32* %pcmp, i64 %n) {
 ; Vefify that a memcmp() call involving a constant array with unknown
 ; contents is not folded.
 
-define void @call_memcmp_a_ax_n(i32* %pcmp, i64 %n) {
+define void @call_memcmp_a_ax_n(ptr %pcmp, i64 %n) {
 ; CHECK-LABEL: @call_memcmp_a_ax_n(
-; CHECK-NEXT:    [[C0_0:%.*]] = call i32 @memcmp(i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @a01230123, i64 0, i64 0), i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @ax, i64 0, i64 0), i64 [[N:%.*]])
-; CHECK-NEXT:    store i32 [[C0_0]], i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    [[C0_0:%.*]] = call i32 @memcmp(ptr nonnull @a01230123, ptr nonnull @ax, i64 [[N:%.*]])
+; CHECK-NEXT:    store i32 [[C0_0]], ptr [[PCMP:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 
-  %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
-  %q0 = getelementptr [8 x i8], [8 x i8]* @ax, i64 0, i64 0
 
   ; Do not fold memcmp(a, ax, n).
-  %c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
-  %s0_0 = getelementptr i32, i32* %pcmp, i64 0
-  store i32 %c0_0, i32* %s0_0
+  %c0_0 = call i32 @memcmp(ptr @a01230123, ptr @ax, i64 %n)
+  store i32 %c0_0, ptr %pcmp
 
   ret void
 }
@@ -106,72 +100,69 @@ define void @call_memcmp_a_ax_n(i32* %pcmp, i64 %n) {
 ; Exercise memcmp(A, C, N) folding of arrays with the same leading bytes
 ; but a difference in the trailing byte.
 
-define void @fold_memcmp_a_c_n(i32* %pcmp, i64 %n) {
+define void @fold_memcmp_a_c_n(ptr %pcmp, i64 %n) {
 ; CHECK-LABEL: @fold_memcmp_a_c_n(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[N:%.*]], 7
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PCMP:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
-; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
-; CHECK-NEXT:    store i32 [[TMP4]], i32* [[S0_1]], align 4
+; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, ptr [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[S0_1]], align 4
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = sext i1 [[TMP5]] to i32
-; CHECK-NEXT:    [[S0_2:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[S0_2]], align 4
+; CHECK-NEXT:    [[S0_2:%.*]] = getelementptr i32, ptr [[PCMP]], i64 2
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[S0_2]], align 4
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i32
-; CHECK-NEXT:    [[S0_3:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
-; CHECK-NEXT:    store i32 [[TMP8]], i32* [[S0_3]], align 4
+; CHECK-NEXT:    [[S0_3:%.*]] = getelementptr i32, ptr [[PCMP]], i64 3
+; CHECK-NEXT:    store i32 [[TMP8]], ptr [[S0_3]], align 4
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
-; CHECK-NEXT:    [[S0_4:%.*]] = getelementptr i32, i32* [[PCMP]], i64 4
-; CHECK-NEXT:    store i32 [[TMP10]], i32* [[S0_4]], align 4
+; CHECK-NEXT:    [[S0_4:%.*]] = getelementptr i32, ptr [[PCMP]], i64 4
+; CHECK-NEXT:    store i32 [[TMP10]], ptr [[S0_4]], align 4
 ; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[N]], 3
 ; CHECK-NEXT:    [[TMP12:%.*]] = sext i1 [[TMP11]] to i32
-; CHECK-NEXT:    [[S0_5:%.*]] = getelementptr i32, i32* [[PCMP]], i64 5
-; CHECK-NEXT:    store i32 [[TMP12]], i32* [[S0_5]], align 4
+; CHECK-NEXT:    [[S0_5:%.*]] = getelementptr i32, ptr [[PCMP]], i64 5
+; CHECK-NEXT:    store i32 [[TMP12]], ptr [[S0_5]], align 4
 ; CHECK-NEXT:    ret void
 ;
 
-  %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
 
-  %q0 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 0
-  %q1 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 1
-  %q2 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 2
-  %q3 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 3
-  %q4 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 4
-  %q5 = getelementptr [8 x i8], [8 x i8]* @c01230129, i64 0, i64 5
+  %q1 = getelementptr [8 x i8], ptr @c01230129, i64 0, i64 1
+  %q2 = getelementptr [8 x i8], ptr @c01230129, i64 0, i64 2
+  %q3 = getelementptr [8 x i8], ptr @c01230129, i64 0, i64 3
+  %q4 = getelementptr [8 x i8], ptr @c01230129, i64 0, i64 4
+  %q5 = getelementptr [8 x i8], ptr @c01230129, i64 0, i64 5
 
   ; Fold memcmp(a, c, n) to N > 7 ? -1 : 0.
-  %c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
-  %s0_0 = getelementptr i32, i32* %pcmp, i64 0
-  store i32 %c0_0, i32* %s0_0
+  %c0_0 = call i32 @memcmp(ptr @a01230123, ptr @c01230129, i64 %n)
+  store i32 %c0_0, ptr %pcmp
 
   ; Fold memcmp(a, c + 1, n) to N != 0 ? -1 : 0.
-  %c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
-  %s0_1 = getelementptr i32, i32* %pcmp, i64 1
-  store i32 %c0_1, i32* %s0_1
+  %c0_1 = call i32 @memcmp(ptr @a01230123, ptr %q1, i64 %n)
+  %s0_1 = getelementptr i32, ptr %pcmp, i64 1
+  store i32 %c0_1, ptr %s0_1
 
   ; Fold memcmp(a, c + 2, n) to N != 0 ? -1 : 0.
-  %c0_2 = call i32 @memcmp(i8* %p0, i8* %q2, i64 %n)
-  %s0_2 = getelementptr i32, i32* %pcmp, i64 2
-  store i32 %c0_2, i32* %s0_2
+  %c0_2 = call i32 @memcmp(ptr @a01230123, ptr %q2, i64 %n)
+  %s0_2 = getelementptr i32, ptr %pcmp, i64 2
+  store i32 %c0_2, ptr %s0_2
 
   ; Fold memcmp(a, c + 3, n) to N != 0 ? -1 : 0.
-  %c0_3 = call i32 @memcmp(i8* %p0, i8* %q3, i64 %n)
-  %s0_3 = getelementptr i32, i32* %pcmp, i64 3
-  store i32 %c0_3, i32* %s0_3
+  %c0_3 = call i32 @memcmp(ptr @a01230123, ptr %q3, i64 %n)
+  %s0_3 = getelementptr i32, ptr %pcmp, i64 3
+  store i32 %c0_3, ptr %s0_3
 
   ; Fold memcmp(a, c + 4, n) to N > 3 ? -1 : 0.
-  %c0_4 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
-  %s0_4 = getelementptr i32, i32* %pcmp, i64 4
-  store i32 %c0_4, i32* %s0_4
+  %c0_4 = call i32 @memcmp(ptr @a01230123, ptr %q4, i64 %n)
+  %s0_4 = getelementptr i32, ptr %pcmp, i64 4
+  store i32 %c0_4, ptr %s0_4
 
   ; Fold memcmp(a, c + 5, n) to N != 0 ? -1 : 0.
-  %c0_5 = call i32 @memcmp(i8* %p0, i8* %q4, i64 %n)
-  %s0_5 = getelementptr i32, i32* %pcmp, i64 5
-  store i32 %c0_5, i32* %s0_5
+  %c0_5 = call i32 @memcmp(ptr @a01230123, ptr %q4, i64 %n)
+  %s0_5 = getelementptr i32, ptr %pcmp, i64 5
+  store i32 %c0_5, ptr %s0_5
 
   ret void
 }
@@ -180,49 +171,46 @@ define void @fold_memcmp_a_c_n(i32* %pcmp, i64 %n) {
 ; Exercise memcmp(A, D, N) folding of arrays of different sizes and
 ; a difference in the leading byte.
 
-define void @fold_memcmp_a_d_n(i32* %pcmp, i64 %n) {
+define void @fold_memcmp_a_d_n(ptr %pcmp, i64 %n) {
 ; CHECK-LABEL: @fold_memcmp_a_d_n(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[N:%.*]], 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = sext i1 [[TMP1]] to i32
-; CHECK-NEXT:    store i32 [[TMP2]], i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    store i32 [[TMP2]], ptr [[PCMP:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i64 [[N]], 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = sext i1 [[TMP3]] to i32
-; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 1
-; CHECK-NEXT:    store i32 [[TMP4]], i32* [[S0_1]], align 4
-; CHECK-NEXT:    [[S1_1:%.*]] = getelementptr i32, i32* [[PCMP]], i64 2
-; CHECK-NEXT:    store i32 0, i32* [[S1_1]], align 4
-; CHECK-NEXT:    [[S6_6:%.*]] = getelementptr i32, i32* [[PCMP]], i64 3
-; CHECK-NEXT:    store i32 0, i32* [[S6_6]], align 4
+; CHECK-NEXT:    [[S0_1:%.*]] = getelementptr i32, ptr [[PCMP]], i64 1
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[S0_1]], align 4
+; CHECK-NEXT:    [[S1_1:%.*]] = getelementptr i32, ptr [[PCMP]], i64 2
+; CHECK-NEXT:    store i32 0, ptr [[S1_1]], align 4
+; CHECK-NEXT:    [[S6_6:%.*]] = getelementptr i32, ptr [[PCMP]], i64 3
+; CHECK-NEXT:    store i32 0, ptr [[S6_6]], align 4
 ; CHECK-NEXT:    ret void
 ;
 
-  %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
-  %p1 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 1
-  %p6 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 6
+  %p1 = getelementptr [8 x i8], ptr @a01230123, i64 0, i64 1
+  %p6 = getelementptr [8 x i8], ptr @a01230123, i64 0, i64 6
 
-  %q0 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 0
-  %q1 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 1
-  %q6 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 6
+  %q1 = getelementptr [7 x i8], ptr @d9123012, i64 0, i64 1
+  %q6 = getelementptr [7 x i8], ptr @d9123012, i64 0, i64 6
 
   ; Fold memcmp(a, d, n) to N != 0 ? -1 : 0.
-  %c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %n)
-  %s0_0 = getelementptr i32, i32* %pcmp, i64 0
-  store i32 %c0_0, i32* %s0_0
+  %c0_0 = call i32 @memcmp(ptr @a01230123, ptr @d9123012, i64 %n)
+  store i32 %c0_0, ptr %pcmp
 
   ; Fold memcmp(a, d + 1, n) to N != 0 -1 : 0.
-  %c0_1 = call i32 @memcmp(i8* %p0, i8* %q1, i64 %n)
-  %s0_1 = getelementptr i32, i32* %pcmp, i64 1
-  store i32 %c0_1, i32* %s0_1
+  %c0_1 = call i32 @memcmp(ptr @a01230123, ptr %q1, i64 %n)
+  %s0_1 = getelementptr i32, ptr %pcmp, i64 1
+  store i32 %c0_1, ptr %s0_1
 
   ; Fold memcmp(a + 1, d + 1, n) to 0.
-  %c1_1 = call i32 @memcmp(i8* %p1, i8* %q1, i64 %n)
-  %s1_1 = getelementptr i32, i32* %pcmp, i64 2
-  store i32 %c1_1, i32* %s1_1
+  %c1_1 = call i32 @memcmp(ptr %p1, ptr %q1, i64 %n)
+  %s1_1 = getelementptr i32, ptr %pcmp, i64 2
+  store i32 %c1_1, ptr %s1_1
 
   ; Fold memcmp(a + 6, d + 6, n) to 0.
-  %c6_6 = call i32 @memcmp(i8* %p6, i8* %q6, i64 %n)
-  %s6_6 = getelementptr i32, i32* %pcmp, i64 3
-  store i32 %c6_6, i32* %s6_6
+  %c6_6 = call i32 @memcmp(ptr %p6, ptr %q6, i64 %n)
+  %s6_6 = getelementptr i32, ptr %pcmp, i64 3
+  store i32 %c6_6, ptr %s6_6
 
   ret void
 }
@@ -231,19 +219,16 @@ define void @fold_memcmp_a_d_n(i32* %pcmp, i64 %n) {
 ; Exercise memcmp(A, D, N) folding of arrays with the same bytes and
 ; a nonzero size.
 
-define void @fold_memcmp_a_d_nz(i32* %pcmp, i64 %n) {
+define void @fold_memcmp_a_d_nz(ptr %pcmp, i64 %n) {
 ; CHECK-LABEL: @fold_memcmp_a_d_nz(
-; CHECK-NEXT:    store i32 -1, i32* [[PCMP:%.*]], align 4
+; CHECK-NEXT:    store i32 -1, ptr [[PCMP:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 
-  %p0 = getelementptr [8 x i8], [8 x i8]* @a01230123, i64 0, i64 0
-  %q0 = getelementptr [7 x i8], [7 x i8]* @d9123012, i64 0, i64 0
   %nz = or i64 %n, 1
 
-  %c0_0 = call i32 @memcmp(i8* %p0, i8* %q0, i64 %nz)
-  %s0_0 = getelementptr i32, i32* %pcmp, i64 0
-  store i32 %c0_0, i32* %s0_0
+  %c0_0 = call i32 @memcmp(ptr @a01230123, ptr @d9123012, i64 %nz)
+  store i32 %c0_0, ptr %pcmp
 
   ret void
 }
diff --git a/llvm/test/Transforms/InstCombine/memmove.ll b/llvm/test/Transforms/InstCombine/memmove.ll
index 22fff289d18ff..26e967d8957ab 100644
--- a/llvm/test/Transforms/InstCombine/memmove.ll
+++ b/llvm/test/Transforms/InstCombine/memmove.ll
@@ -3,68 +3,64 @@
 ;
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
-@S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
-@h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
-@hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
-@hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
+@S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <ptr> [#uses=1]
+@h = constant [2 x i8] c"h\00"		; <ptr> [#uses=1]
+@hel = constant [4 x i8] c"hel\00"		; <ptr> [#uses=1]
+@hello_u = constant [8 x i8] c"hello_u\00"		; <ptr> [#uses=1]
 
-define void @test1(i8* %A, i8* %B, i32 %N) {
+define void @test1(ptr %A, ptr %B, i32 %N) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i1 false)
+  call void @llvm.memmove.p0.p0.i32(ptr %A, ptr %B, i32 0, i1 false)
   ret void
 }
 
-define void @test2(i8* %A, i32 %N) {
+define void @test2(ptr %A, i32 %N) {
   ;; dest can't alias source since we can't write to source!
 ; CHECK-LABEL: @test2(
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 [[A:%.*]], i8* nonnull align 16 getelementptr inbounds ([33 x i8], [33 x i8]* @S, i64 0, i64 0), i32 [[N:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A:%.*]], ptr nonnull align 16 @S, i32 [[N:%.*]], i1 false)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @S, i32 0, i32 0), i32 %N, i1 false)
+  call void @llvm.memmove.p0.p0.i32(ptr %A, ptr @S, i32 %N, i1 false)
   ret void
 }
 
-define i32 @test3([1024 x i8]* %target) { ; arg: [1024 x i8]*> [#uses=1]
+define i32 @test3(ptr %target) { ; arg: ptr> [#uses=1]
 ; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [1024 x i8]* [[TARGET:%.*]] to i16*
-; CHECK-NEXT:    store i16 104, i16* [[TMP1]], align 2
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i32*
-; CHECK-NEXT:    store i32 7103848, i32* [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1024 x i8]* [[TARGET]] to i64*
-; CHECK-NEXT:    store i64 33037504440198504, i64* [[TMP3]], align 8
+; CHECK-NEXT:    store i16 104, ptr [[TARGET:%.*]], align 2
+; CHECK-NEXT:    store i32 7103848, ptr [[TARGET]], align 4
+; CHECK-NEXT:    store i64 33037504440198504, ptr [[TARGET]], align 8
 ; CHECK-NEXT:    ret i32 0
 ;
-  %h_p = getelementptr [2 x i8], [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
-  %hel_p = getelementptr [4 x i8], [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
-  %hello_u_p = getelementptr [8 x i8], [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
-  %target_p = getelementptr [1024 x i8], [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 2 %target_p, i8* align 2 %h_p, i32 2, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 4 %target_p, i8* align 4 %hel_p, i32 4, i1 false)
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* align 8 %target_p, i8* align 8 %hello_u_p, i32 8, i1 false)
+  %h_p = getelementptr [2 x i8], ptr @h, i32 0, i32 0		; <ptr> [#uses=1]
+  %hel_p = getelementptr [4 x i8], ptr @hel, i32 0, i32 0		; <ptr> [#uses=1]
+  %hello_u_p = getelementptr [8 x i8], ptr @hello_u, i32 0, i32 0		; <ptr> [#uses=1]
+  %target_p = getelementptr [1024 x i8], ptr %target, i32 0, i32 0		; <ptr> [#uses=3]
+  call void @llvm.memmove.p0.p0.i32(ptr align 2 %target_p, ptr align 2 %h_p, i32 2, i1 false)
+  call void @llvm.memmove.p0.p0.i32(ptr align 4 %target_p, ptr align 4 %hel_p, i32 4, i1 false)
+  call void @llvm.memmove.p0.p0.i32(ptr align 8 %target_p, ptr align 8 %hello_u_p, i32 8, i1 false)
   ret i32 0
 }
 
 ; PR2370
-define void @test4(i8* %a) {
+define void @test4(ptr %a) {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:    ret void
 ;
-  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i1 false)
+  tail call void @llvm.memmove.p0.p0.i32(ptr %a, ptr %a, i32 100, i1 false)
   ret void
 }
 
 @UnknownConstant = external constant i128
 
-define void @memmove_to_constant(i8* %src) {
+define void @memmove_to_constant(ptr %src) {
 ; CHECK-LABEL: @memmove_to_constant(
 ; CHECK-NEXT:    ret void
 ;
-  %dest = bitcast i128* @UnknownConstant to i8*
-  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 16, i1 false)
+  call void @llvm.memmove.p0.p0.i32(ptr @UnknownConstant, ptr %src, i32 16, i1 false)
   ret void
 }
 
 
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i1) argmemonly nounwind
+declare void @llvm.memmove.p0.p0.i32(ptr nocapture, ptr nocapture readonly, i32, i1) argmemonly nounwind
diff --git a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
index 3bf25aa8eefdd..fc85f21ad3602 100644
--- a/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -16,87 +16,75 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Check cases where dstlen >= len.
 
-define i8* @test_simplify1() {
+define ptr @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
-; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t1, ptr noundef nonnull align 4 dereferenceable(1824) @t2, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t1
 ;
-  %dst = bitcast %struct.T1* @t1 to i8*
-  %src = bitcast %struct.T2* @t2 to i8*
 
-  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = call ptr @__memmove_chk(ptr @t1, ptr @t2, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
-define i8* @test_simplify2() {
+define ptr @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
-; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T3* @t3 to i8*), i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t1, ptr noundef nonnull align 4 dereferenceable(1824) @t3, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t1
 ;
-  %dst = bitcast %struct.T1* @t1 to i8*
-  %src = bitcast %struct.T3* @t3 to i8*
 
-  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
-  ret i8* %ret
+  %ret = call ptr @__memmove_chk(ptr @t1, ptr @t3, i64 1824, i64 2848)
+  ret ptr %ret
 }
 
-define i8* @test_simplify3() {
+define ptr @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
-; CHECK-NEXT:    tail call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+; CHECK-NEXT:    tail call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t1, ptr noundef nonnull align 4 dereferenceable(1824) @t2, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t1
 ;
-  %dst = bitcast %struct.T1* @t1 to i8*
-  %src = bitcast %struct.T2* @t2 to i8*
 
-  %ret = tail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = tail call ptr @__memmove_chk(ptr @t1, ptr @t2, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
 ; Check cases where dstlen < len.
 
-define i8* @test_no_simplify1() {
+define ptr @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__memmove_chk(i8* nonnull bitcast (%struct.T3* @t3 to i8*), i8* nonnull bitcast (%struct.T1* @t1 to i8*), i64 2848, i64 1824)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__memmove_chk(ptr nonnull @t3, ptr nonnull @t1, i64 2848, i64 1824)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = bitcast %struct.T3* @t3 to i8*
-  %src = bitcast %struct.T1* @t1 to i8*
 
-  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
-  ret i8* %ret
+  %ret = call ptr @__memmove_chk(ptr @t3, ptr @t1, i64 2848, i64 1824)
+  ret ptr %ret
 }
 
-define i8* @test_no_simplify2() {
+define ptr @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__memmove_chk(i8* nonnull bitcast (%struct.T1* @t1 to i8*), i8* nonnull bitcast (%struct.T2* @t2 to i8*), i64 1024, i64 0)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__memmove_chk(ptr nonnull @t1, ptr nonnull @t2, i64 1024, i64 0)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = bitcast %struct.T1* @t1 to i8*
-  %src = bitcast %struct.T2* @t2 to i8*
 
-  %ret = call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
-  ret i8* %ret
+  %ret = call ptr @__memmove_chk(ptr @t1, ptr @t2, i64 1024, i64 0)
+  ret ptr %ret
 }
 
-define i8* @test_no_simplify3(i8* %dst, i8* %src, i64 %a, i64 %b) {
+define ptr @test_no_simplify3(ptr %dst, ptr %src, i64 %a, i64 %b) {
 ; CHECK-LABEL: @test_no_simplify3(
-; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @__memmove_chk(i8* [[DST:%.*]], i8* [[SRC:%.*]], i64 1824, i64 1824)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = musttail call ptr @__memmove_chk(ptr [[DST:%.*]], ptr [[SRC:%.*]], i64 1824, i64 1824)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %ret = musttail call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = musttail call ptr @__memmove_chk(ptr %dst, ptr %src, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
-define i8* @test_no_incompatible_attr(i8* %mem, i32 %val, i32 %size) {
+define ptr @test_no_incompatible_attr(ptr %mem, i32 %val, i32 %size) {
 ; CHECK-LABEL: @test_no_incompatible_attr(
-; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T1* @t1 to i8*), i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T2* @t2 to i8*), i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T1* @t1 to i8*)
+; CHECK-NEXT:    call void @llvm.memmove.p0.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t1, ptr noundef nonnull align 4 dereferenceable(1824) @t2, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t1
 ;
-  %dst = bitcast %struct.T1* @t1 to i8*
-  %src = bitcast %struct.T2* @t2 to i8*
 
-  %ret = call dereferenceable(1) i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = call dereferenceable(1) ptr @__memmove_chk(ptr @t1, ptr @t2, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
-declare i8* @__memmove_chk(i8*, i8*, i64, i64)
+declare ptr @__memmove_chk(ptr, ptr, i64, i64)
diff --git a/llvm/test/Transforms/InstCombine/memrchr-3.ll b/llvm/test/Transforms/InstCombine/memrchr-3.ll
index 859bdfc0356e8..ca122e5b7deab 100644
--- a/llvm/test/Transforms/InstCombine/memrchr-3.ll
+++ b/llvm/test/Transforms/InstCombine/memrchr-3.ll
@@ -4,7 +4,7 @@
 ; Verify that memrchr calls with one or more constant arguments are folded
 ; as expected.
 
-declare i8* @memrchr(i8*, i32, i64)
+declare ptr @memrchr(ptr, i32, i64)
 
 @ax = external global [0 x i8]
 @a12345 = constant [5 x i8] c"\01\02\03\04\05"
@@ -13,288 +13,268 @@ declare i8* @memrchr(i8*, i32, i64)
 
 ; Fold memrchr(ax, C, 0) to null.
 
-define i8* @fold_memrchr_ax_c_0(i32 %C) {
+define ptr @fold_memrchr_ax_c_0(i32 %C) {
 ; CHECK-LABEL: @fold_memrchr_ax_c_0(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [0 x i8], [0 x i8]* @ax, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 %C, i64 0)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @ax, i32 %C, i64 0)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 3, 0) to null.
 
-define i8* @fold_memrchr_a12345_3_0() {
+define ptr @fold_memrchr_a12345_3_0() {
 ; CHECK-LABEL: @fold_memrchr_a12345_3_0(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 0)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 3, i64 0)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 1, 1) to a12345.
 
-define i8* @fold_memrchr_a12345_1_1() {
+define ptr @fold_memrchr_a12345_1_1() {
 ; CHECK-LABEL: @fold_memrchr_a12345_1_1(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 0)
+; CHECK-NEXT:    ret ptr @a12345
 ;
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 1, i64 1)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 1, i64 1)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 5, 1) to null.
 
-define i8* @fold_memrchr_a12345_5_1() {
+define ptr @fold_memrchr_a12345_5_1() {
 ; CHECK-LABEL: @fold_memrchr_a12345_5_1(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 5, i64 1)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 5, i64 1)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 1, 1) to a123123.
 
-define i8* @fold_memrchr_a123123_1_1() {
+define ptr @fold_memrchr_a123123_1_1() {
 ; CHECK-LABEL: @fold_memrchr_a123123_1_1(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 0)
+; CHECK-NEXT:    ret ptr @a123123
 ;
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 1, i64 1)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 1, i64 1)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 3, 1) to null.
 
-define i8* @fold_memrchr_a123123_3_1() {
+define ptr @fold_memrchr_a123123_3_1() {
 ; CHECK-LABEL: @fold_memrchr_a123123_3_1(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 1)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 3, i64 1)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(ax, C, 1) to *ax == C ? ax : null.
 
-define i8* @fold_memrchr_ax_c_1(i32 %C) {
+define ptr @fold_memrchr_ax_c_1(i32 %C) {
 ; CHECK-LABEL: @fold_memrchr_ax_c_1(
-; CHECK-NEXT:    [[MEMRCHR_CHAR0:%.*]] = load i8, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @ax, i64 0, i64 0), align 1
+; CHECK-NEXT:    [[MEMRCHR_CHAR0:%.*]] = load i8, ptr @ax, align 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[C:%.*]] to i8
 ; CHECK-NEXT:    [[MEMRCHR_CHAR0CMP:%.*]] = icmp eq i8 [[MEMRCHR_CHAR0]], [[TMP1]]
-; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CHAR0CMP]], i8* getelementptr inbounds ([0 x i8], [0 x i8]* @ax, i64 0, i64 0), i8* null
-; CHECK-NEXT:    ret i8* [[MEMRCHR_SEL]]
+; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CHAR0CMP]], ptr @ax, ptr null
+; CHECK-NEXT:    ret ptr [[MEMRCHR_SEL]]
 ;
-  %ptr = getelementptr [0 x i8], [0 x i8]* @ax, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 %C, i64 1)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @ax, i32 %C, i64 1)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 5, 5) to a12345 + 4.
 
-define i8* @fold_memrchr_a12345_5_5() {
+define ptr @fold_memrchr_a12345_5_5() {
 ; CHECK-LABEL: @fold_memrchr_a12345_5_5(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 4)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 4)
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 5, i64 5)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 5, i64 5)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 5, 4) to null.
 
-define i8* @fold_memrchr_a12345_5_4() {
+define ptr @fold_memrchr_a12345_5_4() {
 ; CHECK-LABEL: @fold_memrchr_a12345_5_4(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 5, i64 4)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 5, i64 4)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 4, 5) to a12345 + 3.
 
-define i8* @fold_memrchr_a12345_4_5() {
+define ptr @fold_memrchr_a12345_4_5() {
 ; CHECK-LABEL: @fold_memrchr_a12345_4_5(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 3)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 3)
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 4, i64 5)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 4, i64 5)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345 + 1, 1, 4) to null.
 
-define i8* @fold_memrchr_a12345p1_1_4() {
+define ptr @fold_memrchr_a12345p1_1_4() {
 ; CHECK-LABEL: @fold_memrchr_a12345p1_1_4(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 1
-  %ret = call i8* @memrchr(i8* %ptr, i32 1, i64 4)
-  ret i8* %ret
+  %ptr = getelementptr [5 x i8], ptr @a12345, i32 0, i32 1
+  %ret = call ptr @memrchr(ptr %ptr, i32 1, i64 4)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345 + 1, 2, 4) to a12345 + 1.
 
-define i8* @fold_memrchr_a12345p1_2_4() {
+define ptr @fold_memrchr_a12345p1_2_4() {
 ; CHECK-LABEL: @fold_memrchr_a12345p1_2_4(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 1)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 1)
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 1
-  %ret = call i8* @memrchr(i8* %ptr, i32 2, i64 4)
-  ret i8* %ret
+  %ptr = getelementptr [5 x i8], ptr @a12345, i32 0, i32 1
+  %ret = call ptr @memrchr(ptr %ptr, i32 2, i64 4)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 2, 5) to a12345 + 1.
 
-define i8* @fold_memrchr_a12345_2_5() {
+define ptr @fold_memrchr_a12345_2_5() {
 ; CHECK-LABEL: @fold_memrchr_a12345_2_5(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 1)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 1)
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 2, i64 5)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 2, i64 5)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 0, %N) to null.
 
-define i8* @fold_memrchr_a12345_0_n(i64 %N) {
+define ptr @fold_memrchr_a12345_0_n(i64 %N) {
 ; CHECK-LABEL: @fold_memrchr_a12345_0_n(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 0, i64 %N)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 0, i64 %N)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 3, n) to n < 3 ? null : s + 2.
 
-define i8* @fold_memrchr_a12345_3_n(i64 %n) {
+define ptr @fold_memrchr_a12345_3_n(i64 %n) {
 ; CHECK-LABEL: @fold_memrchr_a12345_3_n(
 ; CHECK-NEXT:    [[MEMRCHR_CMP:%.*]] = icmp ult i64 [[N:%.*]], 3
-; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CMP]], i8* null, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 2)
-; CHECK-NEXT:    ret i8* [[MEMRCHR_SEL]]
+; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CMP]], ptr null, ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 2)
+; CHECK-NEXT:    ret ptr [[MEMRCHR_SEL]]
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 3, i64 %n)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a12345, 5, n) to n < 5 ? null : s + 4.
 
-define i8* @fold_memrchr_a12345_5_n(i64 %n) {
+define ptr @fold_memrchr_a12345_5_n(i64 %n) {
 ; CHECK-LABEL: @fold_memrchr_a12345_5_n(
 ; CHECK-NEXT:    [[MEMRCHR_CMP:%.*]] = icmp ult i64 [[N:%.*]], 5
-; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CMP]], i8* null, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @a12345, i64 0, i64 4)
-; CHECK-NEXT:    ret i8* [[MEMRCHR_SEL]]
+; CHECK-NEXT:    [[MEMRCHR_SEL:%.*]] = select i1 [[MEMRCHR_CMP]], ptr null, ptr getelementptr inbounds ([5 x i8], ptr @a12345, i64 0, i64 4)
+; CHECK-NEXT:    ret ptr [[MEMRCHR_SEL]]
 ;
 
-  %ptr = getelementptr [5 x i8], [5 x i8]* @a12345, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 5, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a12345, i32 5, i64 %n)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 3, 5) to a123123 + 2.
 
-define i8* @fold_memrchr_a123123_3_5() {
+define ptr @fold_memrchr_a123123_3_5() {
 ; CHECK-LABEL: @fold_memrchr_a123123_3_5(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 2)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([6 x i8], ptr @a123123, i64 0, i64 2)
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 5)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 3, i64 5)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 3, 6) to a123123 + 5.
 
-define i8* @fold_memrchr_a123123_3_6() {
+define ptr @fold_memrchr_a123123_3_6() {
 ; CHECK-LABEL: @fold_memrchr_a123123_3_6(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 5)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([6 x i8], ptr @a123123, i64 0, i64 5)
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 6)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 3, i64 6)
+  ret ptr %ret
 }
 
 ; Fold memrchr(a123123, 2, 6) to a123123 + 4.
 
-define i8* @fold_memrchr_a123123_2_6() {
+define ptr @fold_memrchr_a123123_2_6() {
 ; CHECK-LABEL: @fold_memrchr_a123123_2_6(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 4)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([6 x i8], ptr @a123123, i64 0, i64 4)
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 2, i64 6)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 2, i64 6)
+  ret ptr %ret
 }
 
 ; Fold memrchr(a123123, 1, 6) to a123123 + 3.
 
-define i8* @fold_memrchr_a123123_1_6() {
+define ptr @fold_memrchr_a123123_1_6() {
 ; CHECK-LABEL: @fold_memrchr_a123123_1_6(
-; CHECK-NEXT:    ret i8* getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 3)
+; CHECK-NEXT:    ret ptr getelementptr inbounds ([6 x i8], ptr @a123123, i64 0, i64 3)
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 1, i64 6)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 1, i64 6)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 0, 6) to null.
 
-define i8* @fold_memrchr_a123123_0_6() {
+define ptr @fold_memrchr_a123123_0_6() {
 ; CHECK-LABEL: @fold_memrchr_a123123_0_6(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 0, i64 6)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 0, i64 6)
+  ret ptr %ret
 }
 
 
 ; Fold memrchr(a123123, 0, n) to null
 
-define i8* @fold_memrchr_a123123_0_n(i64 %n) {
+define ptr @fold_memrchr_a123123_0_n(i64 %n) {
 ; CHECK-LABEL: @fold_memrchr_a123123_0_n(
-; CHECK-NEXT:    ret i8* null
+; CHECK-NEXT:    ret ptr null
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 0, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 0, i64 %n)
+  ret ptr %ret
 }
 
 
@@ -302,41 +282,38 @@ define i8* @fold_memrchr_a123123_0_n(i64 %n) {
 ; for a small number of occurrences of the character greater than one, it's
 ; less and less profitable as the number grows).
 
-define i8* @call_memrchr_a123123_3_n(i64 %n) {
+define ptr @call_memrchr_a123123_3_n(i64 %n) {
 ; CHECK-LABEL: @call_memrchr_a123123_3_n(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @memrchr(i8* nonnull getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 0), i32 3, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @memrchr(ptr nonnull @a123123, i32 3, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 3, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 3, i64 %n)
+  ret ptr %ret
 }
 
 
 ; Same as above but for 2.
 
-define i8* @call_memrchr_a123123_2_n(i64 %n) {
+define ptr @call_memrchr_a123123_2_n(i64 %n) {
 ; CHECK-LABEL: @call_memrchr_a123123_2_n(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @memrchr(i8* nonnull getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 0), i32 2, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @memrchr(ptr nonnull @a123123, i32 2, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 2, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 2, i64 %n)
+  ret ptr %ret
 }
 
 
 ; And again for 1 to exercise the other edge case.
 
-define i8* @call_memrchr_a123123_1_n(i64 %n) {
+define ptr @call_memrchr_a123123_1_n(i64 %n) {
 ; CHECK-LABEL: @call_memrchr_a123123_1_n(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @memrchr(i8* nonnull getelementptr inbounds ([6 x i8], [6 x i8]* @a123123, i64 0, i64 0), i32 1, i64 [[N:%.*]])
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @memrchr(ptr nonnull @a123123, i32 1, i64 [[N:%.*]])
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
 
-  %ptr = getelementptr [6 x i8], [6 x i8]* @a123123, i32 0, i32 0
-  %ret = call i8* @memrchr(i8* %ptr, i32 1, i64 %n)
-  ret i8* %ret
+  %ret = call ptr @memrchr(ptr @a123123, i32 1, i64 %n)
+  ret ptr %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/memrchr-8.ll b/llvm/test/Transforms/InstCombine/memrchr-8.ll
index 96a67bb74cc58..9fab1041291d7 100644
--- a/llvm/test/Transforms/InstCombine/memrchr-8.ll
+++ b/llvm/test/Transforms/InstCombine/memrchr-8.ll
@@ -7,7 +7,7 @@
 ; Folding of equality expressions with the first argument plus the bound
 ; -1, i.e., memrchr(S, C, N) == N && S[N - 1] == C is not implemented.
 
-declare i8* @memrchr(i8*, i32, i64)
+declare ptr @memrchr(ptr, i32, i64)
 
 
 @a5 = constant [5 x i8] c"12345";
@@ -19,13 +19,12 @@ declare i8* @memrchr(i8*, i32, i64)
 
 define i1 @call_memrchr_a_c_9_eq_a(i32 %c) {
 ; CHECK-LABEL: @call_memrchr_a_c_9_eq_a(
-; CHECK-NEXT:    [[Q:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(9) getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 9)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0)
+; CHECK-NEXT:    [[Q:%.*]] = call ptr @memrchr(ptr noundef nonnull dereferenceable(9) @a5, i32 [[C:%.*]], i64 9)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[Q]], @a5
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
-  %q = call i8* @memrchr(i8* %p, i32 %c, i64 9)
-  %cmp = icmp eq i8* %q, %p
+  %q = call ptr @memrchr(ptr @a5, i32 %c, i64 9)
+  %cmp = icmp eq ptr %q, @a5
   ret i1 %cmp
 }
 
@@ -34,54 +33,53 @@ define i1 @call_memrchr_a_c_9_eq_a(i32 %c) {
 
 define i1 @call_memrchr_a_c_n_eq_a(i32 %c, i64 %n) {
 ; CHECK-LABEL: @call_memrchr_a_c_n_eq_a(
-; CHECK-NEXT:    [[Q:%.*]] = call i8* @memrchr(i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0), i32 [[C:%.*]], i64 [[N:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[Q]], getelementptr inbounds ([5 x i8], [5 x i8]* @a5, i64 0, i64 0)
+; CHECK-NEXT:    [[Q:%.*]] = call ptr @memrchr(ptr nonnull @a5, i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[Q]], @a5
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %p = getelementptr [5 x i8], [5 x i8]* @a5, i32 0, i32 0
-  %q = call i8* @memrchr(i8* %p, i32 %c, i64 %n)
-  %cmp = icmp eq i8* %q, %p
+  %q = call ptr @memrchr(ptr @a5, i32 %c, i64 %n)
+  %cmp = icmp eq ptr %q, @a5
   ret i1 %cmp
 }
 
 
 ; Do not fold memrchr(s, c, 17).
 
-define i1 @call_memrchr_s_c_17_eq_s(i8* %s, i32 %c) {
+define i1 @call_memrchr_s_c_17_eq_s(ptr %s, i32 %c) {
 ; CHECK-LABEL: @call_memrchr_s_c_17_eq_s(
-; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(17) [[S:%.*]], i32 [[C:%.*]], i64 17)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[P]], [[S]]
+; CHECK-NEXT:    [[P:%.*]] = call ptr @memrchr(ptr noundef nonnull dereferenceable(17) [[S:%.*]], i32 [[C:%.*]], i64 17)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P]], [[S]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %p = call i8* @memrchr(i8* %s, i32 %c, i64 17)
-  %cmp = icmp eq i8* %p, %s
+  %p = call ptr @memrchr(ptr %s, i32 %c, i64 17)
+  %cmp = icmp eq ptr %p, %s
   ret i1 %cmp
 }
 
 
 ; Do not fold memrchr(s, c, 9).
 
-define i1 @call_memrchr_s_c_9_neq_s(i8* %s, i32 %c) {
+define i1 @call_memrchr_s_c_9_neq_s(ptr %s, i32 %c) {
 ; CHECK-LABEL: @call_memrchr_s_c_9_neq_s(
-; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* noundef nonnull dereferenceable(7) [[S:%.*]], i32 [[C:%.*]], i64 7)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8* [[P]], [[S]]
+; CHECK-NEXT:    [[P:%.*]] = call ptr @memrchr(ptr noundef nonnull dereferenceable(7) [[S:%.*]], i32 [[C:%.*]], i64 7)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne ptr [[P]], [[S]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %p = call i8* @memrchr(i8* %s, i32 %c, i64 7)
-  %cmp = icmp ne i8* %p, %s
+  %p = call ptr @memrchr(ptr %s, i32 %c, i64 7)
+  %cmp = icmp ne ptr %p, %s
   ret i1 %cmp
 }
 
 
 ; Do not fold memrchr(s, c, n).
 
-define i1 @fold_memrchr_s_c_n_eq_s(i8* %s, i32 %c, i64 %n) {
+define i1 @fold_memrchr_s_c_n_eq_s(ptr %s, i32 %c, i64 %n) {
 ; CHECK-LABEL: @fold_memrchr_s_c_n_eq_s(
-; CHECK-NEXT:    [[P:%.*]] = call i8* @memrchr(i8* [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[P]], [[S]]
+; CHECK-NEXT:    [[P:%.*]] = call ptr @memrchr(ptr [[S:%.*]], i32 [[C:%.*]], i64 [[N:%.*]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[P]], [[S]]
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
-  %p = call i8* @memrchr(i8* %s, i32 %c, i64 %n)
-  %cmp = icmp eq i8* %p, %s
+  %p = call ptr @memrchr(ptr %s, i32 %c, i64 %n)
+  %cmp = icmp eq ptr %p, %s
   ret i1 %cmp
 }
diff --git a/llvm/test/Transforms/InstCombine/memset_chk-1.ll b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
index 2a5952b29ba9c..44b549e400dd8 100644
--- a/llvm/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/llvm/test/Transforms/InstCombine/memset_chk-1.ll
@@ -12,169 +12,160 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 
 ; Check cases where dstlen >= len.
 
-define i8* @test_simplify1() {
+define ptr @test_simplify1() {
 ; CHECK-LABEL: @test_simplify1(
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t, i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
-define i8* @test_simplify2() {
+define ptr @test_simplify2() {
 ; CHECK-LABEL: @test_simplify2(
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t, i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
-  ret i8* %ret
+  %ret = call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 3648)
+  ret ptr %ret
 }
 
-define i8* @test_simplify3() {
+define ptr @test_simplify3() {
 ; CHECK-LABEL: @test_simplify3(
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t, i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
-  ret i8* %ret
+  %ret = call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 -1)
+  ret ptr %ret
 }
 
 ; Same as @test_simplify1 with tail call.
-define i8* @test_simplify4() {
+define ptr @test_simplify4() {
 ; CHECK-LABEL: @test_simplify4(
-; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+; CHECK-NEXT:    tail call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t, i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = tail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = tail call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
 ; Check cases where dstlen < len.
 
-define i8* @test_no_simplify1() {
+define ptr @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__memset_chk(i8* nonnull bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 400)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__memset_chk(ptr nonnull @t, i32 0, i64 1824, i64 400)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
-  ret i8* %ret
+  %ret = call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 400)
+  ret ptr %ret
 }
 
-define i8* @test_no_simplify2() {
+define ptr @test_no_simplify2() {
 ; CHECK-LABEL: @test_no_simplify2(
-; CHECK-NEXT:    [[RET:%.*]] = call i8* @__memset_chk(i8* nonnull bitcast (%struct.T* @t to i8*), i32 0, i64 1824, i64 0)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = call ptr @__memset_chk(ptr nonnull @t, i32 0, i64 1824, i64 0)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
-  ret i8* %ret
+  %ret = call ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 0)
+  ret ptr %ret
 }
 
-define i8* @test_no_simplify3(i8* %dst, i32 %a, i64 %b, i64 %c) {
+define ptr @test_no_simplify3(ptr %dst, i32 %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: @test_no_simplify3(
-; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @__memset_chk(i8* [[DST:%.*]], i32 0, i64 1824, i64 1824)
-; CHECK-NEXT:    ret i8* [[RET]]
+; CHECK-NEXT:    [[RET:%.*]] = musttail call ptr @__memset_chk(ptr [[DST:%.*]], i32 0, i64 1824, i64 1824)
+; CHECK-NEXT:    ret ptr [[RET]]
 ;
-  %ret = musttail call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = musttail call ptr @__memset_chk(ptr %dst, i32 0, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
 
 ; Test that RAUW in SimplifyLibCalls for __memset_chk generates valid IR
-define i32 @test_rauw(i8* %a, i8* %b, i8** %c) {
+define i32 @test_rauw(ptr %a, ptr %b, ptr %c) {
 ; CHECK-LABEL: @test_rauw(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL49:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[A:%.*]])
+; CHECK-NEXT:    [[CALL49:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[A:%.*]])
 ; CHECK-NEXT:    [[ADD180:%.*]] = add i64 [[CALL49]], 1
-; CHECK-NEXT:    [[YO107:%.*]] = call i64 @llvm.objectsize.i64.p0i8(i8* [[B:%.*]], i1 false, i1 false, i1 false)
-; CHECK-NEXT:    [[CALL50:%.*]] = call i8* @__memmove_chk(i8* [[B]], i8* [[A]], i64 [[ADD180]], i64 [[YO107]])
-; CHECK-NEXT:    [[STRLEN:%.*]] = call i64 @strlen(i8* noundef nonnull dereferenceable(1) [[B]])
-; CHECK-NEXT:    [[STRCHR1:%.*]] = getelementptr inbounds i8, i8* [[B]], i64 [[STRLEN]]
-; CHECK-NEXT:    [[D:%.*]] = load i8*, i8** [[C:%.*]], align 8
-; CHECK-NEXT:    [[SUB182:%.*]] = ptrtoint i8* [[D]] to i64
-; CHECK-NEXT:    [[SUB183:%.*]] = ptrtoint i8* [[B]] to i64
+; CHECK-NEXT:    [[YO107:%.*]] = call i64 @llvm.objectsize.i64.p0(ptr [[B:%.*]], i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[CALL50:%.*]] = call ptr @__memmove_chk(ptr [[B]], ptr [[A]], i64 [[ADD180]], i64 [[YO107]])
+; CHECK-NEXT:    [[STRLEN:%.*]] = call i64 @strlen(ptr noundef nonnull dereferenceable(1) [[B]])
+; CHECK-NEXT:    [[STRCHR1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[STRLEN]]
+; CHECK-NEXT:    [[D:%.*]] = load ptr, ptr [[C:%.*]], align 8
+; CHECK-NEXT:    [[SUB182:%.*]] = ptrtoint ptr [[D]] to i64
+; CHECK-NEXT:    [[SUB183:%.*]] = ptrtoint ptr [[B]] to i64
 ; CHECK-NEXT:    [[SUB184:%.*]] = sub i64 [[SUB182]], [[SUB183]]
 ; CHECK-NEXT:    [[ADD52_I_I:%.*]] = add nsw i64 [[SUB184]], 1
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[STRCHR1]], i8 0, i64 [[ADD52_I_I]], i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 1 [[STRCHR1]], i8 0, i64 [[ADD52_I_I]], i1 false)
 ; CHECK-NEXT:    ret i32 4
 ;
 entry:
-  %call49 = call i64 @strlen(i8* %a)
+  %call49 = call i64 @strlen(ptr %a)
   %add180 = add i64 %call49, 1
-  %yo107 = call i64 @llvm.objectsize.i64.p0i8(i8* %b, i1 false, i1 false, i1 false)
-  %call50 = call i8* @__memmove_chk(i8* %b, i8* %a, i64 %add180, i64 %yo107)
-  %call51i = call i8* @strrchr(i8* %b, i32 0)
-  %d = load i8*, i8** %c, align 8
-  %sub182 = ptrtoint i8* %d to i64
-  %sub183 = ptrtoint i8* %b to i64
+  %yo107 = call i64 @llvm.objectsize.i64.p0(ptr %b, i1 false, i1 false, i1 false)
+  %call50 = call ptr @__memmove_chk(ptr %b, ptr %a, i64 %add180, i64 %yo107)
+  %call51i = call ptr @strrchr(ptr %b, i32 0)
+  %d = load ptr, ptr %c, align 8
+  %sub182 = ptrtoint ptr %d to i64
+  %sub183 = ptrtoint ptr %b to i64
   %sub184 = sub i64 %sub182, %sub183
   %add52.i.i = add nsw i64 %sub184, 1
-  %call185 = call i8* @__memset_chk(i8* %call51i, i32 0, i64 %add52.i.i, i64 -1)
+  %call185 = call ptr @__memset_chk(ptr %call51i, i32 0, i64 %add52.i.i, i64 -1)
   ret i32 4
 }
 
-declare i8* @__memmove_chk(i8*, i8*, i64, i64)
-declare i8* @strrchr(i8*, i32)
-declare i64 @strlen(i8* nocapture)
-declare i64 @llvm.objectsize.i64.p0i8(i8*, i1, i1, i1)
+declare ptr @__memmove_chk(ptr, ptr, i64, i64)
+declare ptr @strrchr(ptr, i32)
+declare i64 @strlen(ptr nocapture)
+declare i64 @llvm.objectsize.i64.p0(ptr, i1, i1, i1)
 
-declare i8* @__memset_chk(i8*, i32, i64, i64)
+declare ptr @__memset_chk(ptr, i32, i64, i64)
 
 ; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
 
-define float* @pr25892(i64 %size) #0 {
+define ptr @pr25892(i64 %size) #0 {
 ; CHECK-LABEL: @pr25892(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CALL:%.*]] = tail call i8* @malloc(i64 [[SIZE:%.*]]) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[CALL]], null
+; CHECK-NEXT:    [[CALL:%.*]] = tail call ptr @malloc(i64 [[SIZE:%.*]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[CALL]], null
 ; CHECK-NEXT:    br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[CALL]] to float*
-; CHECK-NEXT:    [[CALL2:%.*]] = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull [[CALL]], i1 false, i1 false, i1 false)
-; CHECK-NEXT:    [[CALL3:%.*]] = tail call i8* @__memset_chk(i8* nonnull [[CALL]], i32 0, i64 [[SIZE]], i64 [[CALL2]]) #[[ATTR3]]
+; CHECK-NEXT:    [[CALL2:%.*]] = tail call i64 @llvm.objectsize.i64.p0(ptr nonnull [[CALL]], i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[CALL3:%.*]] = tail call ptr @__memset_chk(ptr nonnull [[CALL]], i32 0, i64 [[SIZE]], i64 [[CALL2]]) #[[ATTR3]]
 ; CHECK-NEXT:    br label [[CLEANUP]]
 ; CHECK:       cleanup:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret float* [[RETVAL_0]]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi ptr [ [[CALL]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret ptr [[RETVAL_0]]
 ;
 entry:
-  %call = tail call i8* @malloc(i64 %size) #1
-  %cmp = icmp eq i8* %call, null
+  %call = tail call ptr @malloc(i64 %size) #1
+  %cmp = icmp eq ptr %call, null
   br i1 %cmp, label %cleanup, label %if.end
 if.end:
-  %bc = bitcast i8* %call to float*
-  %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false, i1 false, i1 false)
-  %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2) #1
+  %call2 = tail call i64 @llvm.objectsize.i64.p0(ptr nonnull %call, i1 false, i1 false, i1 false)
+  %call3 = tail call ptr @__memset_chk(ptr nonnull %call, i32 0, i64 %size, i64 %call2) #1
   br label %cleanup
 cleanup:
-  %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
-  ret float* %retval.0
+  %retval.0 = phi ptr [ %call, %if.end ], [ null, %entry ]
+  ret ptr %retval.0
 
 }
 
-define i8* @test_no_incompatible_attr(i8* %mem, i32 %val, i32 %size) {
+define ptr @test_no_incompatible_attr(ptr %mem, i32 %val, i32 %size) {
 ; CHECK-LABEL: @test_no_incompatible_attr(
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(1824) bitcast (%struct.T* @t to i8*), i8 0, i64 1824, i1 false)
-; CHECK-NEXT:    ret i8* bitcast (%struct.T* @t to i8*)
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(1824) @t, i8 0, i64 1824, i1 false)
+; CHECK-NEXT:    ret ptr @t
 ;
-  %dst = bitcast %struct.T* @t to i8*
 
-  %ret = call dereferenceable(1) i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
-  ret i8* %ret
+  %ret = call dereferenceable(1) ptr @__memset_chk(ptr @t, i32 0, i64 1824, i64 1824)
+  ret ptr %ret
 }
 
-declare noalias i8* @malloc(i64) #1
+declare noalias ptr @malloc(i64) #1
 
 attributes #0 = { nounwind ssp uwtable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/mul_full_64.ll b/llvm/test/Transforms/InstCombine/mul_full_64.ll
index 6abdcd56eb22c..ed1afeea3f999 100644
--- a/llvm/test/Transforms/InstCombine/mul_full_64.ll
+++ b/llvm/test/Transforms/InstCombine/mul_full_64.ll
@@ -87,7 +87,7 @@ define { i64, i64 } @mul_full_64_variant0(i64 %x, i64 %y) {
 ; #endif
 ; }
 
-define i64 @mul_full_64_variant1(i64 %a, i64 %b, i64* nocapture %rhi) {
+define i64 @mul_full_64_variant1(i64 %a, i64 %b, ptr nocapture %rhi) {
 ; CHECK-LABEL: @mul_full_64_variant1(
 ; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
 ; CHECK-NEXT:    [[SHR_I43:%.*]] = lshr i64 [[A]], 32
@@ -105,7 +105,7 @@ define i64 @mul_full_64_variant1(i64 %a, i64 %b, i64* nocapture %rhi) {
 ; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
 ; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
 ; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
-; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
 ; CHECK-NEXT:    [[MULLO:%.*]] = mul i64 [[B]], [[A]]
 ; CHECK-NEXT:    ret i64 [[MULLO]]
 ;
@@ -125,12 +125,12 @@ define i64 @mul_full_64_variant1(i64 %a, i64 %b, i64* nocapture %rhi) {
   %add15 = add i64 %conv14, %mul6
   %shr.i = lshr i64 %add15, 32
   %add17 = add i64 %add10, %shr.i
-  store i64 %add17, i64* %rhi, align 8
+  store i64 %add17, ptr %rhi, align 8
   %mullo = mul i64 %b, %a
   ret i64 %mullo
 }
 
-define i64 @mul_full_64_variant2(i64 %a, i64 %b, i64* nocapture %rhi) {
+define i64 @mul_full_64_variant2(i64 %a, i64 %b, ptr nocapture %rhi) {
 ; CHECK-LABEL: @mul_full_64_variant2(
 ; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
 ; CHECK-NEXT:    [[SHR_I58:%.*]] = lshr i64 [[A]], 32
@@ -148,7 +148,7 @@ define i64 @mul_full_64_variant2(i64 %a, i64 %b, i64* nocapture %rhi) {
 ; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
 ; CHECK-NEXT:    [[SHR_I51:%.*]] = lshr i64 [[ADD15]], 32
 ; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I51]]
-; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
 ; CHECK-NEXT:    [[CONV24:%.*]] = shl i64 [[ADD15]], 32
 ; CHECK-NEXT:    [[CONV26:%.*]] = and i64 [[MUL7]], 4294967295
 ; CHECK-NEXT:    [[ADD27:%.*]] = or i64 [[CONV24]], [[CONV26]]
@@ -170,14 +170,14 @@ define i64 @mul_full_64_variant2(i64 %a, i64 %b, i64* nocapture %rhi) {
   %add15 = add i64 %conv14, %mul6
   %shr.i51 = lshr i64 %add15, 32
   %add17 = add i64 %add10, %shr.i51
-  store i64 %add17, i64* %rhi, align 8
+  store i64 %add17, ptr %rhi, align 8
   %conv24 = shl i64 %add15, 32
   %conv26 = and i64 %mul7, 4294967295
   %add27 = or i64 %conv24, %conv26
   ret i64 %add27
 }
 
-define i64 @mul_full_64_variant3(i64 %a, i64 %b, i64* nocapture %rhi) {
+define i64 @mul_full_64_variant3(i64 %a, i64 %b, ptr nocapture %rhi) {
 ; CHECK-LABEL: @mul_full_64_variant3(
 ; CHECK-NEXT:    [[CONV:%.*]] = and i64 [[A:%.*]], 4294967295
 ; CHECK-NEXT:    [[SHR_I45:%.*]] = lshr i64 [[A]], 32
@@ -195,7 +195,7 @@ define i64 @mul_full_64_variant3(i64 %a, i64 %b, i64* nocapture %rhi) {
 ; CHECK-NEXT:    [[ADD15:%.*]] = add i64 [[CONV14]], [[MUL6]]
 ; CHECK-NEXT:    [[SHR_I:%.*]] = lshr i64 [[ADD15]], 32
 ; CHECK-NEXT:    [[ADD17:%.*]] = add i64 [[ADD10]], [[SHR_I]]
-; CHECK-NEXT:    store i64 [[ADD17]], i64* [[RHI:%.*]], align 8
+; CHECK-NEXT:    store i64 [[ADD17]], ptr [[RHI:%.*]], align 8
 ; CHECK-NEXT:    [[ADD19:%.*]] = mul i64 [[A]], [[B]]
 ; CHECK-NEXT:    ret i64 [[ADD19]]
 ;
@@ -215,7 +215,7 @@ define i64 @mul_full_64_variant3(i64 %a, i64 %b, i64* nocapture %rhi) {
   %add15 = add i64 %conv14, %mul6
   %shr.i = lshr i64 %add15, 32
   %add17 = add i64 %add10, %shr.i
-  store i64 %add17, i64* %rhi, align 8
+  store i64 %add17, ptr %rhi, align 8
   %add18 = add i64 %mul6, %mul5
   %shl = shl i64 %add18, 32
   %add19 = add i64 %shl, %mul7
diff --git a/llvm/test/Transforms/InstCombine/objsize.ll b/llvm/test/Transforms/InstCombine/objsize.ll
index 72497a6881b51..33c14f44fc5fb 100644
--- a/llvm/test/Transforms/InstCombine/objsize.ll
+++ b/llvm/test/Transforms/InstCombine/objsize.ll
@@ -4,47 +4,47 @@
 ; We need target data to get the sizes of the arrays and structures.
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 
-@a = private global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*>
-@.str = private constant [8 x i8] c"abcdefg\00"   ; <[8 x i8]*>
+@a = private global [60 x i8] zeroinitializer, align 1 ; <ptr>
+@.str = private constant [8 x i8] c"abcdefg\00"   ; <ptr>
 define i32 @foo() nounwind {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:    ret i32 60
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr @a, i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
-define i8* @bar() nounwind {
+define ptr @bar() nounwind {
 ; CHECK-LABEL: @bar(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i8*, align 4
+; CHECK-NEXT:    [[RETVAL:%.*]] = alloca ptr, align 4
 ; CHECK-NEXT:    br i1 true, label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
 ; CHECK:       cond.true:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i8*, i8** [[RETVAL]], align 4
-; CHECK-NEXT:    ret i8* [[TMP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[RETVAL]], align 4
+; CHECK-NEXT:    ret ptr [[TMP0]]
 ; CHECK:       cond.false:
-; CHECK-NEXT:    ret i8* poison
+; CHECK-NEXT:    ret ptr poison
 ;
 entry:
-  %retval = alloca i8*
-  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @a, i32 0, i32 0), i1 false, i1 false, i1 false)
+  %retval = alloca ptr
+  %0 = call i32 @llvm.objectsize.i32.p0(ptr @a, i1 false, i1 false, i1 false)
   %cmp = icmp ne i32 %0, -1
   br i1 %cmp, label %cond.true, label %cond.false
 
 cond.true:
-  %1 = load i8*, i8** %retval
-  ret i8* %1
+  %1 = load ptr, ptr %retval
+  ret ptr %1
 
 cond.false:
-  %2 = load i8*, i8** %retval
-  ret i8* %2
+  %2 = load ptr, ptr %retval
+  ret ptr %2
 }
 
 define i32 @f() nounwind {
 ; CHECK-LABEL: @f(
 ; CHECK-NEXT:    ret i32 0
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr ([60 x i8], [60 x i8]* @a, i32 1, i32 0), i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr getelementptr ([60 x i8], ptr @a, i32 1, i32 0), i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
@@ -52,19 +52,19 @@ define i32 @f() nounwind {
 
 define i1 @baz() nounwind {
 ; CHECK-LABEL: @baz(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 0), i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call i32 @llvm.objectsize.i32.p0(ptr @window, i1 false, i1 false, i1 false)
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 0), i1 false, i1 false, i1 false)
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr @window, i1 false, i1 false, i1 false)
   %2 = icmp eq i32 %1, -1
   ret i1 %2
 }
 
-define void @test1(i8* %q, i32 %x) nounwind noinline {
+define void @test1(ptr %q, i32 %x) nounwind noinline {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 10), i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr getelementptr inbounds ([0 x i8], ptr @window, i32 0, i32 10), i1 false, i1 false, i1 false)
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], -1
 ; CHECK-NEXT:    br i1 [[TMP1]], label %"47", label %"46"
 ; CHECK:       "46":
@@ -73,7 +73,7 @@ define void @test1(i8* %q, i32 %x) nounwind noinline {
 ; CHECK-NEXT:    unreachable
 ;
 entry:
-  %0 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([0 x i8], [0 x i8]* @window, i32 0, i32 10), i1 false, i1 false, i1 false) ; <i64> [#uses=1]
+  %0 = call i32 @llvm.objectsize.i32.p0(ptr getelementptr inbounds ([0 x i8], ptr @window, i32 0, i32 10), i1 false, i1 false, i1 false) ; <i64> [#uses=1]
   %1 = icmp eq i32 %0, -1                         ; <i1> [#uses=1]
   br i1 %1, label %"47", label %"46"
 
@@ -90,45 +90,43 @@ define i32 @test2() nounwind {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    ret i32 34
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr (i8, i8* bitcast ([9 x i32]* @.str5 to i8*), i32 2), i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr getelementptr (i8, ptr @.str5, i32 2), i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
 ; rdar://7674946
-@array = internal global [480 x float] zeroinitializer ; <[480 x float]*> [#uses=1]
+@array = internal global [480 x float] zeroinitializer ; <ptr> [#uses=1]
 
-declare i8* @__memcpy_chk(i8*, i8*, i32, i32) nounwind
+declare ptr @__memcpy_chk(ptr, ptr, i32, i32) nounwind
 
-declare i32 @llvm.objectsize.i32.p0i8(i8*, i1, i1, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p0(ptr, i1, i1, i1) nounwind readonly
 
-declare i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)*, i1, i1, i1) nounwind readonly
+declare i32 @llvm.objectsize.i32.p1(ptr addrspace(1), i1, i1, i1) nounwind readonly
 
-declare i8* @__inline_memcpy_chk(i8*, i8*, i32) nounwind inlinehint
+declare ptr @__inline_memcpy_chk(ptr, ptr, i32) nounwind inlinehint
 
-define void @test3(i1 %c1, i8* %ptr1, i8* %ptr2, i8* %ptr3) nounwind {
+define void @test3(i1 %c1, ptr %ptr1, ptr %ptr2, ptr %ptr3) nounwind {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[C1:%.*]], label [[BB11:%.*]], label [[BB12:%.*]]
 ; CHECK:       bb11:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       bb12:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i8* @__inline_memcpy_chk(i8* nonnull bitcast (float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 1) to i8*), i8* [[PTR3:%.*]], i32 512) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__inline_memcpy_chk(ptr nonnull getelementptr inbounds ([480 x float], ptr @array, i32 0, i32 1), ptr [[PTR3:%.*]], i32 512) #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    unreachable
 ;
 entry:
   br i1 %c1, label %bb11, label %bb12
 
 bb11:
-  %0 = getelementptr inbounds float, float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
-  %1 = bitcast float* %0 to i8*                   ; <i8*> [#uses=1]
-  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false, i1 false) ; <i32> [#uses=1]
-  %3 = call i8* @__memcpy_chk(i8* %ptr1, i8* %ptr2, i32 512, i32 %2) nounwind ; <i8*> [#uses=0]
+  %0 = getelementptr inbounds float, ptr getelementptr inbounds ([480 x float], ptr @array, i32 0, i32 128), i32 -127 ; <ptr> [#uses=1]
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr %0, i1 false, i1 false, i1 false) ; <i32> [#uses=1]
+  %2 = call ptr @__memcpy_chk(ptr %ptr1, ptr %ptr2, i32 512, i32 %1) nounwind ; <ptr> [#uses=0]
   unreachable
 
 bb12:
-  %4 = getelementptr inbounds float, float* getelementptr inbounds ([480 x float], [480 x float]* @array, i32 0, i32 128), i32 -127 ; <float*> [#uses=1]
-  %5 = bitcast float* %4 to i8*                   ; <i8*> [#uses=1]
-  %6 = call i8* @__inline_memcpy_chk(i8* %5, i8* %ptr3, i32 512) nounwind inlinehint ; <i8*> [#uses=0]
+  %3 = getelementptr inbounds float, ptr getelementptr inbounds ([480 x float], ptr @array, i32 0, i32 128), i32 -127 ; <ptr> [#uses=1]
+  %4 = call ptr @__inline_memcpy_chk(ptr %3, ptr %ptr3, i32 512) nounwind inlinehint ; <ptr> [#uses=0]
   unreachable
 }
 
@@ -136,175 +134,170 @@ bb12:
 
 %struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
 
-define i32 @test4(i8** %esc) nounwind ssp {
+define i32 @test4(ptr %esc) nounwind ssp {
 ; CHECK-LABEL: @test4(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_DATA:%.*]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %struct.data* [[TMP0]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* noundef nonnull align 8 dereferenceable(1824) [[TMP1]], i8 0, i32 1824, i1 false) #[[ATTR0:[0-9]+]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8** [[ESC:%.*]] to %struct.data**
-; CHECK-NEXT:    store %struct.data* [[TMP0]], %struct.data** [[TMP2]], align 4
+; CHECK-NEXT:    call void @llvm.memset.p0.i32(ptr noundef nonnull align 8 dereferenceable(1824) [[TMP0]], i8 0, i32 1824, i1 false) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    store ptr [[TMP0]], ptr [[ESC:%.*]], align 4
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
   %0 = alloca %struct.data, align 8
-  %1 = bitcast %struct.data* %0 to i8*
-  %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false, i1 false, i1 false) nounwind
-  %3 = call i8* @__memset_chk(i8* %1, i32 0, i32 1824, i32 %2) nounwind
-  store i8* %1, i8** %esc
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr %0, i1 false, i1 false, i1 false) nounwind
+  %2 = call ptr @__memset_chk(ptr %0, i32 0, i32 1824, i32 %1) nounwind
+  store ptr %0, ptr %esc
   ret i32 0
 }
 
 ; rdar://7782496
-@s = external global i8*
+@s = external global ptr
 
-define i8* @test5(i32 %n) nounwind ssp {
+define ptr @test5(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #[[ATTR0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** @s, align 8
-; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* noundef nonnull align 1 dereferenceable(10) [[TMP0]], i8* noundef nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) #[[ATTR0]]
-; CHECK-NEXT:    ret i8* [[TMP0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) ptr @malloc(i32 20) #[[ATTR0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @s, align 8
+; CHECK-NEXT:    tail call void @llvm.memcpy.p0.p0.i32(ptr noundef nonnull align 1 dereferenceable(10) [[TMP0]], ptr noundef nonnull align 1 dereferenceable(10) [[TMP1]], i32 10, i1 false) #[[ATTR0]]
+; CHECK-NEXT:    ret ptr [[TMP0]]
 ;
 entry:
-  %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false, i1 false)
-  %2 = load i8*, i8** @s, align 8
-  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 10, i32 %1) nounwind
-  ret i8* %0
+  %0 = tail call noalias ptr @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %0, i1 false, i1 false, i1 false)
+  %2 = load ptr, ptr @s, align 8
+  %3 = tail call ptr @__memcpy_chk(ptr %0, ptr %2, i32 10, i32 %1) nounwind
+  ret ptr %0
 }
 
 define void @test6(i32 %n) nounwind ssp {
 ; CHECK-LABEL: @test6(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) i8* @malloc(i32 20) #[[ATTR0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8*, i8** @s, align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i8* @__memcpy_chk(i8* [[TMP0]], i8* [[TMP1]], i32 30, i32 20) #[[ATTR0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call noalias dereferenceable_or_null(20) ptr @malloc(i32 20) #[[ATTR0]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr @s, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call ptr @__memcpy_chk(ptr [[TMP0]], ptr [[TMP1]], i32 30, i32 20) #[[ATTR0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
-  %0 = tail call noalias i8* @malloc(i32 20) nounwind
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %0, i1 false, i1 false, i1 false)
-  %2 = load i8*, i8** @s, align 8
-  %3 = tail call i8* @__memcpy_chk(i8* %0, i8* %2, i32 30, i32 %1) nounwind
+  %0 = tail call noalias ptr @malloc(i32 20) nounwind
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %0, i1 false, i1 false, i1 false)
+  %2 = load ptr, ptr @s, align 8
+  %3 = tail call ptr @__memcpy_chk(ptr %0, ptr %2, i32 30, i32 %1) nounwind
   ret void
 }
 
-declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
+declare ptr @__memset_chk(ptr, i32, i32, i32) nounwind
 
-declare noalias i8* @malloc(i32) nounwind allockind("alloc,uninitialized") allocsize(0)
+declare noalias ptr @malloc(i32) nounwind allockind("alloc,uninitialized") allocsize(0)
 
-define i32 @test7(i8** %esc) {
+define i32 @test7(ptr %esc) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) i8* @malloc(i32 48) #[[ATTR0]]
-; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4
+; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(48) ptr @malloc(i32 48) #[[ATTR0]]
+; CHECK-NEXT:    store ptr [[ALLOC]], ptr [[ESC:%.*]], align 4
 ; CHECK-NEXT:    ret i32 32
 ;
-  %alloc = call noalias i8* @malloc(i32 48) nounwind
-  store i8* %alloc, i8** %esc
-  %gep = getelementptr inbounds i8, i8* %alloc, i32 16
-  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false, i1 false) nounwind readonly
+  %alloc = call noalias ptr @malloc(i32 48) nounwind
+  store ptr %alloc, ptr %esc
+  %gep = getelementptr inbounds i8, ptr %alloc, i32 16
+  %objsize = call i32 @llvm.objectsize.i32.p0(ptr %gep, i1 false, i1 false, i1 false) nounwind readonly
   ret i32 %objsize
 }
 
-declare noalias i8* @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1)
+declare noalias ptr @calloc(i32, i32) nounwind allockind("alloc,zeroed") allocsize(0,1)
 
-define i32 @test8(i8** %esc) {
+define i32 @test8(ptr %esc) {
 ; CHECK-LABEL: @test8(
-; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) i8* @calloc(i32 5, i32 7) #[[ATTR0]]
-; CHECK-NEXT:    store i8* [[ALLOC]], i8** [[ESC:%.*]], align 4
+; CHECK-NEXT:    [[ALLOC:%.*]] = call noalias dereferenceable_or_null(35) ptr @calloc(i32 5, i32 7) #[[ATTR0]]
+; CHECK-NEXT:    store ptr [[ALLOC]], ptr [[ESC:%.*]], align 4
 ; CHECK-NEXT:    ret i32 30
 ;
-  %alloc = call noalias i8* @calloc(i32 5, i32 7) nounwind
-  store i8* %alloc, i8** %esc
-  %gep = getelementptr inbounds i8, i8* %alloc, i32 5
-  %objsize = call i32 @llvm.objectsize.i32.p0i8(i8* %gep, i1 false, i1 false, i1 false) nounwind readonly
+  %alloc = call noalias ptr @calloc(i32 5, i32 7) nounwind
+  store ptr %alloc, ptr %esc
+  %gep = getelementptr inbounds i8, ptr %alloc, i32 5
+  %objsize = call i32 @llvm.objectsize.i32.p0(ptr %gep, i1 false, i1 false, i1 false) nounwind readonly
   ret i32 %objsize
 }
 
-declare noalias i8* @strdup(i8* nocapture) nounwind
-declare noalias i8* @strndup(i8* nocapture, i32) nounwind
+declare noalias ptr @strdup(ptr nocapture) nounwind
+declare noalias ptr @strndup(ptr nocapture, i32) nounwind
 
-define i32 @test9(i8** %esc) {
+define i32 @test9(ptr %esc) {
 ; CHECK-LABEL: @test9(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0)) #[[ATTR0]]
-; CHECK-NEXT:    store i8* [[CALL]], i8** [[ESC:%.*]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(8) ptr @strdup(ptr nonnull @.str) #[[ATTR0]]
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
-  %call = tail call i8* @strdup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0)) nounwind
-  store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+  %call = tail call ptr @strdup(ptr @.str) nounwind
+  store ptr %call, ptr %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %call, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
-define i32 @test10(i8** %esc) {
+define i32 @test10(ptr %esc) {
 ; CHECK-LABEL: @test10(
-; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(4) i8* @strndup(i8* nonnull dereferenceable(8) getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0), i32 3) #[[ATTR0]]
-; CHECK-NEXT:    store i8* [[CALL]], i8** [[ESC:%.*]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call dereferenceable_or_null(4) ptr @strndup(ptr nonnull dereferenceable(8) @.str, i32 3) #[[ATTR0]]
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 4
 ;
-  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 3) nounwind
-  store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+  %call = tail call ptr @strndup(ptr @.str, i32 3) nounwind
+  store ptr %call, ptr %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %call, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
-define i32 @test11(i8** %esc) {
+define i32 @test11(ptr %esc) {
 ; CHECK-LABEL: @test11(
-; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
-; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) ptr @strdup(ptr nonnull @.str)
+; CHECK-NEXT:    store ptr [[STRDUP]], ptr [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
-  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 7) nounwind
-  store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+  %call = tail call ptr @strndup(ptr @.str, i32 7) nounwind
+  store ptr %call, ptr %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %call, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
-define i32 @test12(i8** %esc) {
+define i32 @test12(ptr %esc) {
 ; CHECK-LABEL: @test12(
-; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
-; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) ptr @strdup(ptr nonnull @.str)
+; CHECK-NEXT:    store ptr [[STRDUP]], ptr [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
-  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 8) nounwind
-  store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+  %call = tail call ptr @strndup(ptr @.str, i32 8) nounwind
+  store ptr %call, ptr %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %call, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
-define i32 @test13(i8** %esc) {
+define i32 @test13(ptr %esc) {
 ; CHECK-LABEL: @test13(
-; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) i8* @strdup(i8* nonnull getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i32 0, i32 0))
-; CHECK-NEXT:    store i8* [[STRDUP]], i8** [[ESC:%.*]], align 8
+; CHECK-NEXT:    [[STRDUP:%.*]] = tail call dereferenceable_or_null(8) ptr @strdup(ptr nonnull @.str)
+; CHECK-NEXT:    store ptr [[STRDUP]], ptr [[ESC:%.*]], align 8
 ; CHECK-NEXT:    ret i32 8
 ;
-  %call = tail call i8* @strndup(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i32 57) nounwind
-  store i8* %call, i8** %esc, align 8
-  %1 = tail call i32 @llvm.objectsize.i32.p0i8(i8* %call, i1 true, i1 false, i1 false)
+  %call = tail call ptr @strndup(ptr @.str, i32 57) nounwind
+  store ptr %call, ptr %esc, align 8
+  %1 = tail call i32 @llvm.objectsize.i32.p0(ptr %call, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
-@globalalias = internal alias [60 x i8], [60 x i8]* @a
+@globalalias = internal alias [60 x i8], ptr @a
 
 define i32 @test18() {
 ; CHECK-LABEL: @test18(
 ; CHECK-NEXT:    ret i32 60
 ;
-  %bc = bitcast [60 x i8]* @globalalias to i8*
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr @globalalias, i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
-@globalalias2 = weak alias [60 x i8], [60 x i8]* @a
+@globalalias2 = weak alias [60 x i8], ptr @a
 
 define i32 @test19() {
 ; CHECK-LABEL: @test19(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0i8(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @globalalias2, i32 0, i32 0), i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr @globalalias2, i1 false, i1 false, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %bc = bitcast [60 x i8]* @globalalias2 to i8*
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %bc, i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr @globalalias2, i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
@@ -312,7 +305,7 @@ define i32 @test20() {
 ; CHECK-LABEL: @test20(
 ; CHECK-NEXT:    ret i32 0
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr null, i1 false, i1 false, i1 false)
   ret i32 %1
 }
 
@@ -320,65 +313,65 @@ define i32 @test21() {
 ; CHECK-LABEL: @test21(
 ; CHECK-NEXT:    ret i32 0
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 false, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr null, i1 true, i1 false, i1 false)
   ret i32 %1
 }
 
 define i32 @test22() {
 ; CHECK-LABEL: @test22(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 true, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr null, i1 false, i1 true, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 false, i1 true, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr null, i1 false, i1 true, i1 false)
   ret i32 %1
 }
 
 define i32 @test23() {
 ; CHECK-LABEL: @test23(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 true, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p0(ptr null, i1 true, i1 true, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p0i8(i8* null, i1 true, i1 true, i1 false)
+  %1 = call i32 @llvm.objectsize.i32.p0(ptr null, i1 true, i1 true, i1 false)
   ret i32 %1
 }
 
 ; 1 is an arbitrary non-zero address space.
 define i32 @test24() {
 ; CHECK-LABEL: @test24(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false, i1 false, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 false, i1 false, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+  %1 = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 false,
   i1 false, i1 false)
   ret i32 %1
 }
 
 define i32 @test25() {
 ; CHECK-LABEL: @test25(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true, i1 false, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 true, i1 false, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+  %1 = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 true,
   i1 false, i1 false)
   ret i32 %1
 }
 
 define i32 @test26() {
 ; CHECK-LABEL: @test26(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false, i1 true, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 false, i1 true, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 false,
+  %1 = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 false,
   i1 true, i1 false)
   ret i32 %1
 }
 
 define i32 @test27() {
 ; CHECK-LABEL: @test27(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true, i1 true, i1 false)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 true, i1 true, i1 false)
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
-  %1 = call i32 @llvm.objectsize.i32.p1i8(i8 addrspace(1)* null, i1 true,
+  %1 = call i32 @llvm.objectsize.i32.p1(ptr addrspace(1) null, i1 true,
   i1 true, i1 false)
   ret i32 %1
 }
diff --git a/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll b/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
index ded5123acd63e..d693fd55f54f0 100644
--- a/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/ptrauth-intrinsics.ll
@@ -1,87 +1,87 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-define i64 @test_ptrauth_nop(i8* %p) {
+define i64 @test_ptrauth_nop(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_nop(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    ret i64 [[TMP0]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 1, i64 1234)
   %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 1, i64 1234)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_nop_mismatch(i8* %p) {
+define i64 @test_ptrauth_nop_mismatch(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_nop_mismatch(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[SIGNED:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP0]], i32 1, i64 1234)
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[SIGNED]], i32 1, i64 10)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 1, i64 1234)
   %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 1, i64 10)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_nop_mismatch_keys(i8* %p) {
+define i64 @test_ptrauth_nop_mismatch_keys(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_nop_mismatch_keys(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[SIGNED:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP0]], i32 0, i64 1234)
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[SIGNED]], i32 1, i64 1234)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 0, i64 1234)
   %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 1, i64 1234)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_sign_resign(i8* %p) {
+define i64 @test_ptrauth_sign_resign(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_sign_resign(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.sign(i64 [[TMP0]], i32 0, i64 42)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.sign(i64 %tmp0, i32 1, i64 1234)
   %authed = call i64 @llvm.ptrauth.resign(i64 %signed, i32 1, i64 1234, i32 0, i64 42)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_resign_resign(i8* %p) {
+define i64 @test_ptrauth_resign_resign(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_resign_resign(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.resign(i64 [[TMP0]], i32 1, i64 1234, i32 1, i64 3141)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.resign(i64 %tmp0, i32 1, i64 1234, i32 0, i64 42)
   %authed = call i64 @llvm.ptrauth.resign(i64 %signed, i32 0, i64 42, i32 1, i64 3141)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_resign_auth(i8* %p) {
+define i64 @test_ptrauth_resign_auth(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_resign_auth(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 [[TMP0]], i32 1, i64 1234)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.resign(i64 %tmp0, i32 1, i64 1234, i32 0, i64 42)
   %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 0, i64 42)
   ret i64 %authed
 }
 
-define i64 @test_ptrauth_resign_auth_mismatch(i8* %p) {
+define i64 @test_ptrauth_resign_auth_mismatch(ptr %p) {
 ; CHECK-LABEL: @test_ptrauth_resign_auth_mismatch(
-; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint i8* [[P:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P:%.*]] to i64
 ; CHECK-NEXT:    [[SIGNED:%.*]] = call i64 @llvm.ptrauth.resign(i64 %tmp0, i32 1, i64 1234, i32 0, i64 10)
 ; CHECK-NEXT:    [[AUTHED:%.*]] = call i64 @llvm.ptrauth.auth(i64 %signed, i32 0, i64 42)
 ; CHECK-NEXT:    ret i64 [[AUTHED]]
 ;
-  %tmp0 = ptrtoint i8* %p to i64
+  %tmp0 = ptrtoint ptr %p to i64
   %signed = call i64 @llvm.ptrauth.resign(i64 %tmp0, i32 1, i64 1234, i32 0, i64 10)
   %authed = call i64 @llvm.ptrauth.auth(i64 %signed, i32 0, i64 42)
   ret i64 %authed
diff --git a/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll b/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll
index 3079acd5f5146..730dc782096df 100644
--- a/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll
+++ b/llvm/test/Transforms/InstCombine/ptrtoint-nullgep.ll
@@ -13,32 +13,32 @@
 target datalayout = "p:64:64:64:64"
 
 declare void @use_i64(i64)
-declare void @use_ptr(i8 addrspace(1)*)
+declare void @use_ptr(ptr addrspace(1))
 
 define i64 @constant_fold_ptrtoint_gep_zero() {
 ; ALL-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_zero() {
 ; ALL-NEXT:    ret i64 0
 ;
-  ret i64 ptrtoint (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* null, i64 0) to i64)
+  ret i64 ptrtoint (ptr addrspace(1) null to i64)
 }
 define i64 @constant_fold_ptrtoint_gep_nonzero() {
 ; LLPARSER-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero() {
-; LLPARSER-NEXT:    ret i64 ptrtoint (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* null, i64 1234) to i64)
+; LLPARSER-NEXT:    ret i64 ptrtoint (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) null, i64 1234) to i64)
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero() {
-; INSTSIMPLIFY-NEXT:    ret i64 ptrtoint (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* null, i64 1234) to i64)
+; INSTSIMPLIFY-NEXT:    ret i64 ptrtoint (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) null, i64 1234) to i64)
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero() {
 ; INSTCOMBINE-NEXT:    ret i64 4936
 ;
-  ret i64 ptrtoint (i32 addrspace(1)* getelementptr (i32, i32 addrspace(1)* null, i64 1234) to i64)
+  ret i64 ptrtoint (ptr addrspace(1) getelementptr (i32, ptr addrspace(1) null, i64 1234) to i64)
 }
 
 define i64 @constant_fold_ptrtoint_gep_zero_inbounds() {
 ; ALL-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_zero_inbounds() {
 ; ALL-NEXT:    ret i64 0
 ;
-  ret i64 ptrtoint (i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 0) to i64)
+  ret i64 ptrtoint (ptr addrspace(1) null to i64)
 }
 
 ; In theory we could fold this to poison/null, but that would break offsetof
@@ -46,28 +46,28 @@ define i64 @constant_fold_ptrtoint_gep_zero_inbounds() {
 ; TODO: should Clang special case ((INTEGER)&((TYPE *)0)->MEMBER) to emit a non-inbounds GEP?
 define i64 @constant_fold_ptrtoint_gep_nonzero_inbounds() {
 ; LLPARSER-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero_inbounds() {
-; LLPARSER-NEXT:    ret i64 ptrtoint (i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 1234) to i64)
+; LLPARSER-NEXT:    ret i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i32, ptr addrspace(1) null, i64 1234) to i64)
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero_inbounds() {
-; INSTSIMPLIFY-NEXT:    ret i64 ptrtoint (i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 1234) to i64)
+; INSTSIMPLIFY-NEXT:    ret i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i32, ptr addrspace(1) null, i64 1234) to i64)
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_gep_nonzero_inbounds() {
 ; INSTCOMBINE-NEXT:    ret i64 4936
 ;
-  ret i64 ptrtoint (i32 addrspace(1)* getelementptr inbounds (i32, i32 addrspace(1)* null, i64 1234) to i64)
+  ret i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i32, ptr addrspace(1) null, i64 1234) to i64)
 }
 
 ; Check all combinations of inbounds+non-inbounds GEP with the outer GEP having a non-zero offset
 define void @constant_fold_ptrtoint_of_gep_of_nullgep() {
 ; LLPARSER-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_of_gep_of_nullgep() {
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; LLPARSER-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
 ; LLPARSER-NEXT:    call void @use_i64(i64 0)
 ; LLPARSER-NEXT:    call void @use_i64(i64 0)
 ; LLPARSER-NEXT:    call void @use_i64(i64 0)
@@ -75,14 +75,14 @@ define void @constant_fold_ptrtoint_of_gep_of_nullgep() {
 ; LLPARSER-NEXT:    ret void
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@constant_fold_ptrtoint_of_gep_of_nullgep() {
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
-; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
 ; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 0)
 ; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 0)
 ; INSTSIMPLIFY-NEXT:    call void @use_i64(i64 0)
@@ -104,20 +104,20 @@ define void @constant_fold_ptrtoint_of_gep_of_nullgep() {
 ; INSTCOMBINE-NEXT:    call void @use_i64(i64 0)
 ; INSTCOMBINE-NEXT:    ret void
 ;
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 0), i64 1234) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 0), i64 1234) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 0), i64 1234) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 0), i64 1234) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234) to i64))
   ; Same again but this time with the inner GEP using the non-zero offset
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234), i64 0) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 1234), i64 0) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234), i64 0) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 1234), i64 0) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234), i64 0) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 1234), i64 0) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234), i64 0) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 1234), i64 0) to i64))
   ; And finally with two constants that sum to zero
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 -1), i64 1) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* null, i64 -1), i64 1) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr inbounds (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 -1), i64 1) to i64))
-  call void @use_i64(i64 ptrtoint (i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* getelementptr (i8, i8 addrspace(1)* null, i64 -1), i64 1) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 -1), i64 1) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) null, i64 -1), i64 1) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 -1), i64 1) to i64))
+  call void @use_i64(i64 ptrtoint (ptr addrspace(1) getelementptr (i8, ptr addrspace(1) getelementptr (i8, ptr addrspace(1) null, i64 -1), i64 1) to i64))
   ret void
 }
 
@@ -125,48 +125,48 @@ define void @constant_fold_ptrtoint_of_gep_of_nullgep() {
 define i64 @fold_ptrtoint_nullgep_zero() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_zero() {
 ; LLPARSER-NEXT:    [[OFFSET:%.*]] = add i64 0, 0
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_zero() {
 ; CHECK-NEXT:    ret i64 0
 ;
   %offset = add i64 0, 0
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nullgep_zero_inbounds() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_zero_inbounds() {
 ; LLPARSER-NEXT:    [[OFFSET:%.*]] = add i64 0, 0
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_zero_inbounds() {
 ; CHECK-NEXT:    ret i64 0
 ;
   %offset = add i64 0, 0
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nullgep_nonzero() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_nonzero() {
 ; LLPARSER-NEXT:    [[OFFSET:%.*]] = add i64 1234, 0
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_nonzero() {
 ; CHECK-NEXT:    ret i64 1234
 ;
   %offset = add i64 1234, 0
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -175,16 +175,16 @@ define i64 @fold_ptrtoint_nullgep_nonzero() {
 define i64 @fold_ptrtoint_nullgep_nonzero_inbounds() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_nonzero_inbounds() {
 ; LLPARSER-NEXT:    [[OFFSET:%.*]] = add i64 1234, 0
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_nonzero_inbounds() {
 ; CHECK-NEXT:    ret i64 1234
 ;
   %offset = add i64 1234, 0
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -192,22 +192,22 @@ define i64 @fold_ptrtoint_nullgep_nonzero_inbounds() {
 define i64 @fold_ptrtoint_nullgep_variable(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[VAL]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[VAL]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable
 ; INSTCOMBINE-SAME: (i64 [[VAL:%.*]]) {
 ; INSTCOMBINE-NEXT:    ret i64 [[VAL]]
 ;
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %val
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %val
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -216,15 +216,15 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
 ; LLPARSER-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
 ; INSTSIMPLIFY-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero
@@ -233,8 +233,8 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero(i64 %val) {
 ; INSTCOMBINE-NEXT:    ret i64 [[NON_ZERO_OFFSET]]
 ;
   %non_zero_offset = or i64 %val, 1
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %non_zero_offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %non_zero_offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -242,22 +242,22 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero(i64 %val) {
 define i64 @fold_ptrtoint_nullgep_variable_inbounds(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[VAL]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[VAL]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[VAL]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[VAL]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_inbounds
 ; INSTCOMBINE-SAME: (i64 [[VAL:%.*]]) {
 ; INSTCOMBINE-NEXT:    ret i64 [[VAL]]
 ;
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %val
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %val
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -266,15 +266,15 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
 ; LLPARSER-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
 ; INSTSIMPLIFY-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NON_ZERO_OFFSET]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds
@@ -283,8 +283,8 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds(i64 %val) {
 ; INSTCOMBINE-NEXT:    ret i64 [[NON_ZERO_OFFSET]]
 ;
   %non_zero_offset = or i64 %val, 1
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %non_zero_offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %non_zero_offset
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -293,15 +293,15 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indic
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indices
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
 ; LLPARSER-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x i8], [2 x i8] addrspace(1)* null, i64 [[NON_ZERO_OFFSET]], i32 1
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x i8], ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]], i32 1
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indices
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
 ; INSTSIMPLIFY-NEXT:    [[NON_ZERO_OFFSET:%.*]] = or i64 [[VAL]], 1
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x i8], [2 x i8] addrspace(1)* null, i64 [[NON_ZERO_OFFSET]], i32 1
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [2 x i8], ptr addrspace(1) null, i64 [[NON_ZERO_OFFSET]], i32 1
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indices
@@ -311,8 +311,8 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indic
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
   %non_zero_offset = or i64 %val, 1
-  %ptr = getelementptr inbounds [2 x i8], [2 x i8] addrspace(1)* null, i64 %non_zero_offset, i32 1
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds [2 x i8], ptr addrspace(1) null, i64 %non_zero_offset, i32 1
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -321,14 +321,14 @@ define i64 @fold_ptrtoint_nullgep_variable_known_nonzero_inbounds_multiple_indic
 define i64 @fold_ptrtoint_nullgep_i32_variable(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_i32_variable
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i32, i32 addrspace(1)* null, i64 [[VAL]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i32 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i32, ptr addrspace(1) null, i64 [[VAL]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_i32_variable
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i32, i32 addrspace(1)* null, i64 [[VAL]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i32 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i32, ptr addrspace(1) null, i64 [[VAL]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_i32_variable
@@ -336,8 +336,8 @@ define i64 @fold_ptrtoint_nullgep_i32_variable(i64 %val) {
 ; INSTCOMBINE-NEXT:    [[PTR_IDX:%.*]] = shl i64 [[VAL]], 2
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_IDX]]
 ;
-  %ptr = getelementptr i32, i32 addrspace(1)* null, i64 %val
-  %ret = ptrtoint i32 addrspace(1)* %ptr to i64
+  %ptr = getelementptr i32, ptr addrspace(1) null, i64 %val
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -345,14 +345,14 @@ define i64 @fold_ptrtoint_nullgep_i32_variable(i64 %val) {
 define i32 @fold_ptrtoint_nullgep_variable_trunc(i64 %val) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc
 ; LLPARSER-SAME: (i64 [[VAL:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i32
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[VAL]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
 ; LLPARSER-NEXT:    ret i32 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc
 ; INSTSIMPLIFY-SAME: (i64 [[VAL:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[VAL]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i32
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[VAL]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i32
 ; INSTSIMPLIFY-NEXT:    ret i32 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_variable_trunc
@@ -360,8 +360,8 @@ define i32 @fold_ptrtoint_nullgep_variable_trunc(i64 %val) {
 ; INSTCOMBINE-NEXT:    [[RET:%.*]] = trunc i64 [[VAL]] to i32
 ; INSTCOMBINE-NEXT:    ret i32 [[RET]]
 ;
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %val
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i32
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %val
+  %ret = ptrtoint ptr addrspace(1) %ptr to i32
   ret i32 %ret
 }
 
@@ -371,9 +371,9 @@ define i64 @fold_ptrtoint_zero_nullgep_of_nonzero_inbounds_nullgep() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_zero_nullgep_of_nonzero_inbounds_nullgep() {
 ; LLPARSER-NEXT:    [[NONZERO_OFFSET:%.*]] = add i64 1234, 0
 ; LLPARSER-NEXT:    [[ZERO_OFFSET:%.*]] = sub i64 [[NONZERO_OFFSET]], 1234
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NONZERO_OFFSET]]
-; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr i8, i8 addrspace(1)* [[PTR]], i64 [[ZERO_OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR2]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[NONZERO_OFFSET]]
+; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr i8, ptr addrspace(1) [[PTR]], i64 [[ZERO_OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR2]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_zero_nullgep_of_nonzero_inbounds_nullgep() {
@@ -381,9 +381,9 @@ define i64 @fold_ptrtoint_zero_nullgep_of_nonzero_inbounds_nullgep() {
 ;
   %nonzero_offset = add i64 1234, 0
   %zero_offset = sub i64 %nonzero_offset, 1234
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %nonzero_offset
-  %ptr2 = getelementptr i8, i8 addrspace(1)* %ptr, i64 %zero_offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr2 to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %nonzero_offset
+  %ptr2 = getelementptr i8, ptr addrspace(1) %ptr, i64 %zero_offset
+  %ret = ptrtoint ptr addrspace(1) %ptr2 to i64
   ret i64 %ret
 }
 
@@ -391,9 +391,9 @@ define i64 @fold_ptrtoint_nonzero_inbounds_nullgep_of_zero_noninbounds_nullgep()
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nonzero_inbounds_nullgep_of_zero_noninbounds_nullgep() {
 ; LLPARSER-NEXT:    [[NONZERO_OFFSET:%.*]] = add i64 1234, 0
 ; LLPARSER-NEXT:    [[ZERO_OFFSET:%.*]] = sub i64 [[NONZERO_OFFSET]], 1234
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, i8 addrspace(1)* null, i64 [[ZERO_OFFSET]]
-; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[PTR]], i64 [[NONZERO_OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR2]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr addrspace(1) null, i64 [[ZERO_OFFSET]]
+; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i64 [[NONZERO_OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR2]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_nonzero_inbounds_nullgep_of_zero_noninbounds_nullgep() {
@@ -401,9 +401,9 @@ define i64 @fold_ptrtoint_nonzero_inbounds_nullgep_of_zero_noninbounds_nullgep()
 ;
   %nonzero_offset = add i64 1234, 0
   %zero_offset = sub i64 %nonzero_offset, 1234
-  %ptr = getelementptr i8, i8 addrspace(1)* null, i64 %zero_offset
-  %ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 %nonzero_offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr2 to i64
+  %ptr = getelementptr i8, ptr addrspace(1) null, i64 %zero_offset
+  %ptr2 = getelementptr inbounds i8, ptr addrspace(1) %ptr, i64 %nonzero_offset
+  %ret = ptrtoint ptr addrspace(1) %ptr2 to i64
   ret i64 %ret
 }
 
@@ -415,15 +415,15 @@ define i64 @fold_complex_index_last_nonzero(i64 %x) local_unnamed_addr #0 {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_complex_index_last_nonzero
 ; LLPARSER-SAME: (i64 [[X:%.*]]) local_unnamed_addr {
 ; LLPARSER-NEXT:  entry:
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* null, i64 0, i32 0, i64 0, i32 0, i64 [[X]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr addrspace(1) null, i64 0, i32 0, i64 0, i32 0, i64 [[X]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_complex_index_last_nonzero
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]]) local_unnamed_addr {
 ; INSTSIMPLIFY-NEXT:  entry:
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* null, i64 0, i32 0, i64 0, i32 0, i64 [[X]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr addrspace(1) null, i64 0, i32 0, i64 0, i32 0, i64 [[X]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_complex_index_last_nonzero
@@ -432,8 +432,8 @@ define i64 @fold_complex_index_last_nonzero(i64 %x) local_unnamed_addr #0 {
 ; INSTCOMBINE-NEXT:    ret i64 [[X]]
 ;
 entry:
-  %ptr = getelementptr inbounds %struct.S, %struct.S addrspace(1)* null, i64 0, i32 0, i64 0, i32 0, i64 %x
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds %struct.S, ptr addrspace(1) null, i64 0, i32 0, i64 0, i32 0, i64 %x
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -441,15 +441,15 @@ define i64 @fold_complex_index_multiple_nonzero(i64 %x) local_unnamed_addr #0 {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_complex_index_multiple_nonzero
 ; LLPARSER-SAME: (i64 [[X:%.*]]) local_unnamed_addr {
 ; LLPARSER-NEXT:  entry:
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* null, i64 1, i32 0, i64 1, i32 0, i64 [[X]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr addrspace(1) null, i64 1, i32 0, i64 1, i32 0, i64 [[X]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_complex_index_multiple_nonzero
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]]) local_unnamed_addr {
 ; INSTSIMPLIFY-NEXT:  entry:
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], [[STRUCT_S]] addrspace(1)* null, i64 1, i32 0, i64 1, i32 0, i64 [[X]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr addrspace(1) null, i64 1, i32 0, i64 1, i32 0, i64 [[X]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_complex_index_multiple_nonzero
@@ -459,8 +459,8 @@ define i64 @fold_complex_index_multiple_nonzero(i64 %x) local_unnamed_addr #0 {
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
 entry:
-  %ptr = getelementptr inbounds %struct.S, %struct.S addrspace(1)* null, i64 1, i32 0, i64 1, i32 0, i64 %x
-  %ret = ptrtoint i8 addrspace(1)* %ptr to i64
+  %ptr = getelementptr inbounds %struct.S, ptr addrspace(1) null, i64 1, i32 0, i64 1, i32 0, i64 %x
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -468,9 +468,9 @@ define i64 @fold_ptrtoint_inbounds_nullgep_of_nonzero_inbounds_nullgep() {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_inbounds_nullgep_of_nonzero_inbounds_nullgep() {
 ; LLPARSER-NEXT:    [[NONZERO_OFFSET:%.*]] = add i64 1234, 0
 ; LLPARSER-NEXT:    [[ZERO_OFFSET:%.*]] = sub i64 [[NONZERO_OFFSET]], 1234
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* null, i64 [[NONZERO_OFFSET]]
-; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[PTR]], i64 [[ZERO_OFFSET]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i8 addrspace(1)* [[PTR2]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) null, i64 [[NONZERO_OFFSET]]
+; LLPARSER-NEXT:    [[PTR2:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[PTR]], i64 [[ZERO_OFFSET]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR2]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; CHECK-LABEL: define {{[^@]+}}@fold_ptrtoint_inbounds_nullgep_of_nonzero_inbounds_nullgep() {
@@ -478,9 +478,9 @@ define i64 @fold_ptrtoint_inbounds_nullgep_of_nonzero_inbounds_nullgep() {
 ;
   %nonzero_offset = add i64 1234, 0
   %zero_offset = sub i64 %nonzero_offset, 1234
-  %ptr = getelementptr inbounds i8, i8 addrspace(1)* null, i64 %nonzero_offset
-  %ptr2 = getelementptr inbounds i8, i8 addrspace(1)* %ptr, i64 %zero_offset
-  %ret = ptrtoint i8 addrspace(1)* %ptr2 to i64
+  %ptr = getelementptr inbounds i8, ptr addrspace(1) null, i64 %nonzero_offset
+  %ptr2 = getelementptr inbounds i8, ptr addrspace(1) %ptr, i64 %zero_offset
+  %ret = ptrtoint ptr addrspace(1) %ptr2 to i64
   ret i64 %ret
 }
 
@@ -488,14 +488,14 @@ define i64 @fold_ptrtoint_inbounds_nullgep_of_nonzero_inbounds_nullgep() {
 define i64 @fold_ptrtoint_nullgep_array_one_var_1(i64 %x) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_1
 ; LLPARSER-SAME: (i64 [[X:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 [[X]], i64 3
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 [[X]], i64 3
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_1
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 [[X]], i64 3
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 [[X]], i64 3
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_1
@@ -504,22 +504,22 @@ define i64 @fold_ptrtoint_nullgep_array_one_var_1(i64 %x) {
 ; INSTCOMBINE-NEXT:    [[PTR_OFFS:%.*]] = add i64 [[PTR_IDX]], 6
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
-  %ptr = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 %x, i64 3
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x i16], ptr addrspace(1) null, i64 %x, i64 3
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nullgep_array_one_var_2(i64 %x) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_2
 ; LLPARSER-SAME: (i64 [[X:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 7, i64 [[X]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 7, i64 [[X]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_2
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 7, i64 [[X]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 7, i64 [[X]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nullgep_array_one_var_2
@@ -528,22 +528,22 @@ define i64 @fold_ptrtoint_nullgep_array_one_var_2(i64 %x) {
 ; INSTCOMBINE-NEXT:    [[PTR_OFFS:%.*]] = add i64 [[PTR_IDX]], 28
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
-  %ptr = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 7, i64 %x
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x i16], ptr addrspace(1) null, i64 7, i64 %x
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nested_array_two_vars(i64 %x, i64 %y) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars
 ; LLPARSER-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 [[X]], i64 [[Y]]
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 [[X]], i64 [[Y]]
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 [[X]], i64 [[Y]]
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 [[X]], i64 [[Y]]
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars
@@ -554,22 +554,22 @@ define i64 @fold_ptrtoint_nested_array_two_vars(i64 %x, i64 %y) {
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
 
-  %ptr = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 %x, i64 %y
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x i16], ptr addrspace(1) null, i64 %x, i64 %y
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nested_array_two_vars_plus_zero(i64 %x, i64 %y) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_zero
 ; LLPARSER-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 [[X]], i64 [[Y]], i64 0
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 [[X]], i64 [[Y]], i64 0
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_zero
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 [[X]], i64 [[Y]], i64 0
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 [[X]], i64 [[Y]], i64 0
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_zero
@@ -579,22 +579,22 @@ define i64 @fold_ptrtoint_nested_array_two_vars_plus_zero(i64 %x, i64 %y) {
 ; INSTCOMBINE-NEXT:    [[PTR_OFFS:%.*]] = add i64 [[PTR_IDX]], [[PTR_IDX1]]
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS]]
 ;
-  %ptr = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 %x, i64 %y, i64 0
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 %x, i64 %y, i64 0
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
 define i64 @fold_ptrtoint_nested_array_two_vars_plus_const(i64 %x, i64 %y) {
 ; LLPARSER-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_const
 ; LLPARSER-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 [[X]], i64 [[Y]], i64 1
-; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; LLPARSER-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 [[X]], i64 [[Y]], i64 1
+; LLPARSER-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; LLPARSER-NEXT:    ret i64 [[RET]]
 ;
 ; INSTSIMPLIFY-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_const
 ; INSTSIMPLIFY-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 [[X]], i64 [[Y]], i64 1
-; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; INSTSIMPLIFY-NEXT:    [[PTR:%.*]] = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 [[X]], i64 [[Y]], i64 1
+; INSTSIMPLIFY-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; INSTSIMPLIFY-NEXT:    ret i64 [[RET]]
 ;
 ; INSTCOMBINE-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_array_two_vars_plus_const
@@ -605,8 +605,8 @@ define i64 @fold_ptrtoint_nested_array_two_vars_plus_const(i64 %x, i64 %y) {
 ; INSTCOMBINE-NEXT:    [[PTR_OFFS2:%.*]] = or i64 [[PTR_OFFS]], 2
 ; INSTCOMBINE-NEXT:    ret i64 [[PTR_OFFS2]]
 ;
-  %ptr = getelementptr [2 x [2 x i16]], [2 x [2 x i16]] addrspace(1)* null, i64 %x, i64 %y, i64 1
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x [2 x i16]], ptr addrspace(1) null, i64 %x, i64 %y, i64 1
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
 
@@ -614,15 +614,13 @@ define i64 @fold_ptrtoint_nested_array_two_vars_plus_const(i64 %x, i64 %y) {
 define i64 @fold_ptrtoint_nested_nullgep_array_variable_multiple_uses(i64 %x, i64 %y) {
 ; ALL-LABEL: define {{[^@]+}}@fold_ptrtoint_nested_nullgep_array_variable_multiple_uses
 ; ALL-SAME: (i64 [[X:%.*]], i64 [[Y:%.*]]) {
-; ALL-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 [[X]], i64 [[Y]]
-; ALL-NEXT:    [[PTRI8:%.*]] = bitcast i16 addrspace(1)* [[PTR]] to i8 addrspace(1)*
-; ALL-NEXT:    call void @use_ptr(i8 addrspace(1)* [[PTRI8]])
-; ALL-NEXT:    [[RET:%.*]] = ptrtoint i16 addrspace(1)* [[PTR]] to i64
+; ALL-NEXT:    [[PTR:%.*]] = getelementptr [2 x i16], ptr addrspace(1) null, i64 [[X]], i64 [[Y]]
+; ALL-NEXT:    call void @use_ptr(ptr addrspace(1) [[PTR]])
+; ALL-NEXT:    [[RET:%.*]] = ptrtoint ptr addrspace(1) [[PTR]] to i64
 ; ALL-NEXT:    ret i64 [[RET]]
 ;
-  %ptr = getelementptr [2 x i16], [2 x i16] addrspace(1)* null, i64 %x, i64 %y
-  %ptri8 = bitcast i16 addrspace(1)* %ptr to i8 addrspace(1)*
-  call void @use_ptr(i8 addrspace(1)* %ptri8)
-  %ret = ptrtoint i16 addrspace(1)* %ptr to i64
+  %ptr = getelementptr [2 x i16], ptr addrspace(1) null, i64 %x, i64 %y
+  call void @use_ptr(ptr addrspace(1) %ptr)
+  %ret = ptrtoint ptr addrspace(1) %ptr to i64
   ret i64 %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/puts-1.ll b/llvm/test/Transforms/InstCombine/puts-1.ll
index 30371beb645c7..427b051c8fe41 100644
--- a/llvm/test/Transforms/InstCombine/puts-1.ll
+++ b/llvm/test/Transforms/InstCombine/puts-1.ll
@@ -7,7 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 
 @empty = constant [1 x i8] zeroinitializer
 
-declare i32 @puts(i8*)
+declare i32 @puts(ptr)
 
 ; Check puts("") -> putchar('\n').
 
@@ -16,8 +16,7 @@ define void @test_simplify1() {
 ; CHECK-NEXT:    [[PUTCHAR:%.*]] = call i32 @putchar(i32 10)
 ; CHECK-NEXT:    ret void
 ;
-  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
-  call i32 @puts(i8* %str)
+  call i32 @puts(ptr @empty)
   ret void
 }
 
@@ -25,10 +24,9 @@ define void @test_simplify1() {
 
 define i32 @test_no_simplify1() {
 ; CHECK-LABEL: @test_no_simplify1(
-; CHECK-NEXT:    [[RET:%.*]] = call i32 @puts(i8* noundef nonnull dereferenceable(1) getelementptr inbounds ([1 x i8], [1 x i8]* @empty, i32 0, i32 0))
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @empty)
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %str = getelementptr [1 x i8], [1 x i8]* @empty, i32 0, i32 0
-  %ret = call i32 @puts(i8* %str)
+  %ret = call i32 @puts(ptr @empty)
   ret i32 %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/select-masked_gather.ll b/llvm/test/Transforms/InstCombine/select-masked_gather.ll
index 22d7e71613994..70d798ecd5085 100644
--- a/llvm/test/Transforms/InstCombine/select-masked_gather.ll
+++ b/llvm/test/Transforms/InstCombine/select-masked_gather.ll
@@ -2,123 +2,123 @@
 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
 
 ; Fold zeroing of inactive lanes into the gather's passthrough parameter.
-define <vscale x 2 x float> @masked_gather_and_zero_inactive_1(<vscale x 2 x float*> %ptr, <vscale x 2 x i1> %mask) {
+define <vscale x 2 x float> @masked_gather_and_zero_inactive_1(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_1(
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x float> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x float> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[GATHER]]
 ;
-  %gather = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+  %gather = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
   %masked = select <vscale x 2 x i1> %mask, <vscale x 2 x float> %gather, <vscale x 2 x float> zeroinitializer
   ret <vscale x 2 x float> %masked
 }
 
 ; As above but reuse the gather's existing passthrough.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_2(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %mask) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_2(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %mask) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_2(
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[GATHER]]
 ;
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer)
   %masked = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %gather, <vscale x 2 x i32> zeroinitializer
   ret <vscale x 2 x i32> %masked
 }
 
 ; No transform when the gather's passthrough cannot be reused or altered.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_3(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_3(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_3(
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x i32> [[PASSTHROUGH:%.*]])
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x i32> [[PASSTHROUGH:%.*]])
 ; CHECK-NEXT:    [[MASKED:%.*]] = select <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> [[GATHER]], <vscale x 2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[MASKED]]
 ;
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough)
   %masked = select <vscale x 2 x i1> %mask, <vscale x 2 x i32> %gather, <vscale x 2 x i32> zeroinitializer
   ret <vscale x 2 x i32> %masked
 }
 
 ; Remove redundant select when its mask doesn't overlap with the gather mask.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_4(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %inv_mask) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_4(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_4(
 ; CHECK-NEXT:    [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[GATHER]]
 ;
   %splat  = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %mask = xor <vscale x 2 x i1> %inv_mask, %splat
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
   %masked = select <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %gather
   ret <vscale x 2 x i32> %masked
 }
 
 ; As above but reuse the gather's existing passthrough.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_5(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %inv_mask) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_5(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_5(
 ; CHECK-NEXT:    [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[GATHER]]
 ;
   %splat  = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %mask = xor <vscale x 2 x i1> %inv_mask, %splat
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer)
   %masked = select <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %gather
   ret <vscale x 2 x i32> %masked
 }
 
 ; No transform when the gather's passthrough cannot be reused or altered.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_6(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> %passthrough) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_6(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> %passthrough) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_6(
 ; CHECK-NEXT:    [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer)
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> [[PASSTHROUGH:%.*]])
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK]], <vscale x 2 x i32> [[PASSTHROUGH:%.*]])
 ; CHECK-NEXT:    [[MASKED:%.*]] = select <vscale x 2 x i1> [[INV_MASK]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[GATHER]]
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[MASKED]]
 ;
   %splat  = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %mask = xor <vscale x 2 x i1> %inv_mask, %splat
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthrough)
   %masked = select <vscale x 2 x i1> %inv_mask, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %gather
   ret <vscale x 2 x i32> %masked
 }
 
 ; No transform when select and gather masks have no relation.
-define <vscale x 2 x i32> @masked_gather_and_zero_inactive_7(<vscale x 2 x i32*> %ptr, <vscale x 2 x i1> %mask1, <vscale x 2 x i1> %mask2) {
+define <vscale x 2 x i32> @masked_gather_and_zero_inactive_7(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %mask1, <vscale x 2 x i1> %mask2) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_7(
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0i32(<vscale x 2 x i32*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK1:%.*]], <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[MASK1:%.*]], <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[MASKED:%.*]] = select <vscale x 2 x i1> [[MASK2:%.*]], <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> [[GATHER]]
 ; CHECK-NEXT:    ret <vscale x 2 x i32> [[MASKED]]
 ;
-  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*> %ptr, i32 4, <vscale x 2 x i1> %mask1, <vscale x 2 x i32> zeroinitializer)
+  %gather = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %mask1, <vscale x 2 x i32> zeroinitializer)
   %masked = select <vscale x 2 x i1> %mask2, <vscale x 2 x i32> zeroinitializer, <vscale x 2 x i32> %gather
   ret <vscale x 2 x i32> %masked
 }
 
 ; A more complex case where we can prove the select mask is a subset of the
 ; gather's inactive lanes and thus the gather's passthrough takes effect.
-define <vscale x 2 x float> @masked_gather_and_zero_inactive_8(<vscale x 2 x float*> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i1> %cond) {
+define <vscale x 2 x float> @masked_gather_and_zero_inactive_8(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %inv_mask, <vscale x 2 x i1> %cond) {
 ; CHECK-LABEL: @masked_gather_and_zero_inactive_8(
 ; CHECK-NEXT:    [[MASK:%.*]] = xor <vscale x 2 x i1> [[INV_MASK:%.*]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer)
 ; CHECK-NEXT:    [[PG:%.*]] = and <vscale x 2 x i1> [[MASK]], [[COND:%.*]]
-; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[PG]], <vscale x 2 x float> zeroinitializer)
+; CHECK-NEXT:    [[GATHER:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 4, <vscale x 2 x i1> [[PG]], <vscale x 2 x float> zeroinitializer)
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[GATHER]]
 ;
   %splat  = shufflevector <vscale x 2 x i1> insertelement (<vscale x 2 x i1> undef, i1 true, i32 0), <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
   %mask = xor <vscale x 2 x i1> %inv_mask, %splat
   %pg = and <vscale x 2 x i1> %mask, %cond
-  %gather = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptr, i32 4, <vscale x 2 x i1> %pg, <vscale x 2 x float> undef)
+  %gather = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptr, i32 4, <vscale x 2 x i1> %pg, <vscale x 2 x float> undef)
   %masked = select <vscale x 2 x i1> %inv_mask, <vscale x 2 x float> zeroinitializer, <vscale x 2 x float> %gather
   ret <vscale x 2 x float> %masked
 }
 
-define <vscale x 2 x float> @masked_load_and_scalar_select_cond(<vscale x 2 x float*> %ptr, <vscale x 2 x i1> %mask, i1 %cond) {
+define <vscale x 2 x float> @masked_load_and_scalar_select_cond(<vscale x 2 x ptr> %ptr, <vscale x 2 x i1> %mask, i1 %cond) {
 ; CHECK-LABEL: @masked_load_and_scalar_select_cond(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0f32(<vscale x 2 x float*> [[PTR:%.*]], i32 32, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x float> undef)
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32.nxv2p0(<vscale x 2 x ptr> [[PTR:%.*]], i32 32, <vscale x 2 x i1> [[MASK:%.*]], <vscale x 2 x float> undef)
 ; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND:%.*]], <vscale x 2 x float> zeroinitializer, <vscale x 2 x float> [[TMP0]]
 ; CHECK-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 ;
 entry:
-  %0 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*> %ptr, i32 32, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
+  %0 = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptr, i32 32, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
   %1 = select i1 %cond, <vscale x 2 x float> zeroinitializer, <vscale x 2 x float> %0
   ret <vscale x 2 x float> %1
 }
 
-declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x i32*>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
-declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x float*>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
+declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll
index ee83f7709280a..fb2117945eb7b 100644
--- a/llvm/test/Transforms/InstCombine/sincospi.ll
+++ b/llvm/test/Transforms/InstCombine/sincospi.ll
@@ -53,7 +53,7 @@ define float @test_instbased_f32() {
   ret float %res
 }
 
-define float @test_instbased_f32_other_user(float* %ptr) {
+define float @test_instbased_f32_other_user(ptr %ptr) {
 ; CHECK-FLOAT-IN-VEC-LABEL: @test_instbased_f32_other_user(
 ; CHECK-FLOAT-IN-VEC-NEXT:    [[VAL:%.*]] = load float, ptr @var32, align 4
 ; CHECK-FLOAT-IN-VEC-NEXT:    [[SINCOSPI:%.*]] = call <2 x float> @__sincospif_stret(float [[VAL]])
@@ -85,7 +85,7 @@ define float @test_instbased_f32_other_user(float* %ptr) {
 ; CHECK-NO-SINCOS-NEXT:    ret float [[RES]]
 ;
   %val = load float, ptr @var32
-  store float %val, float* %ptr
+  store float %val, ptr %ptr
   %sin = call float @__sinpif(float %val) #0
   %cos = call float @__cospif(float %val) #0
   %res = fadd float %sin, %cos
diff --git a/llvm/test/Transforms/InstCombine/stdio-custom-dl.ll b/llvm/test/Transforms/InstCombine/stdio-custom-dl.ll
index dfdc8c1327235..cc06be7e759d0 100644
--- a/llvm/test/Transforms/InstCombine/stdio-custom-dl.ll
+++ b/llvm/test/Transforms/InstCombine/stdio-custom-dl.ll
@@ -2,8 +2,8 @@
 ; RUN: opt < %s -passes=instcombine -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
 
 target datalayout = "e-m:o-p:40:64:64:32-i64:64-f80:128-n8:16:32:64-S128"
-%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+%struct._IO_FILE = type { i32, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, i32, i32, i64, i16, i8, [1 x i8], ptr, i64, ptr, ptr, ptr, ptr, i64, i32, [20 x i8] }
+%struct._IO_marker = type { ptr, ptr, i32 }
 @.str = private unnamed_addr constant [5 x i8] c"file\00", align 1
 @.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1
 @.str.2 = private unnamed_addr constant [4 x i8] c"str\00", align 1
@@ -11,14 +11,14 @@ target datalayout = "e-m:o-p:40:64:64:32-i64:64-f80:128-n8:16:32:64-S128"
 ; Check fwrite is generated with arguments of ptr size, not index size
 define internal void @fputs_test_custom_dl() {
 ; CHECK-LABEL: @fputs_test_custom_dl(
-; CHECK-NEXT:    [[CALL:%.*]] = call %struct._IO_FILE* @fopen(i8* nonnull getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i32 0, i32 0), i8* nonnull getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0))
-; CHECK-NEXT:    [[TMP1:%.*]] = call i40 @fwrite(i8* nonnull getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i32 0, i32 0), i40 3, i40 1, %struct._IO_FILE* [[CALL]])
+; CHECK-NEXT:    [[CALL:%.*]] = call ptr @fopen(ptr nonnull @.str, ptr nonnull @.str.1)
+; CHECK-NEXT:    [[TMP1:%.*]] = call i40 @fwrite(ptr nonnull @.str.2, i40 3, i40 1, ptr [[CALL]])
 ; CHECK-NEXT:    ret void
 ;
-  %call = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0))
-  %call1 = call i32 @fputs(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), %struct._IO_FILE* %call)
+  %call = call ptr @fopen(ptr @.str, ptr @.str.1)
+  %call1 = call i32 @fputs(ptr @.str.2, ptr %call)
   ret void
 }
 
-declare %struct._IO_FILE* @fopen(i8*, i8*)
-declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture)
+declare ptr @fopen(ptr, ptr)
+declare i32 @fputs(ptr nocapture readonly, ptr nocapture)
diff --git a/llvm/test/Transforms/InstCombine/stpncpy-1.ll b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
index b915506f3cb9a..6501ca4c6ff8b 100644
--- a/llvm/test/Transforms/InstCombine/stpncpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpncpy-1.ll
@@ -5,9 +5,9 @@
 ; RUN: opt < %s -data-layout="E" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,BE
 ; RUN: opt < %s -data-layout="e" -passes=instcombine -S | FileCheck %s --check-prefixes=ANY,LE
 
-declare i8* @stpncpy(i8*, i8*, i64)
+declare ptr @stpncpy(ptr, ptr, i64)
 
-declare void @sink(i8*, i8*)
+declare void @sink(ptr, ptr)
 
 @a4 = constant [4 x i8] c"1234"
 @s4 = constant [5 x i8] c"1234\00"
@@ -41,23 +41,23 @@ declare void @sink(i8*, i8*)
 ; ANY: @[[STR_8:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
 ; ANY: @[[STR_9:[a-zA-Z0-9_$"\\.-]+]] = private unnamed_addr constant [10 x i8] c"1234\00\00\00\00\00\00", align 1
 ;.
-define void @fold_stpncpy_overlap(i8* %dst, i64 %n) {
+define void @fold_stpncpy_overlap(ptr %dst, i64 %n) {
 ; ANY-LABEL: @fold_stpncpy_overlap(
-; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[DST]], align 1
+; ANY-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;
 ; Fold stpncpy(D, D, 0) to just D.
-  %es_0 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 0)
-  call void @sink(i8* %dst, i8* %es_0)
+  %es_0 = call ptr @stpncpy(ptr %dst, ptr %dst, i64 0)
+  call void @sink(ptr %dst, ptr %es_0)
 
 ; Fold stpncpy(D, D, 1) to D + (*D != '\0').
-  %es_1 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 1)
-  call void @sink(i8* %dst, i8* %es_1)
+  %es_1 = call ptr @stpncpy(ptr %dst, ptr %dst, i64 1)
+  call void @sink(ptr %dst, ptr %es_1)
 
   ret void
 }
@@ -67,27 +67,27 @@ define void @fold_stpncpy_overlap(i8* %dst, i64 %n) {
 ; when N >= 2.  Such calls are strictly undefined and while simplifying
 ; them to the expected result is possible there is little to gain from it.
 
-define void @call_stpncpy_overlap(i8* %dst, i64 %n) {
+define void @call_stpncpy_overlap(ptr %dst, i64 %n) {
 ; ANY-LABEL: @call_stpncpy_overlap(
-; ANY-NEXT:    [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 2)
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_2]])
-; ANY-NEXT:    [[ES_3:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST]], i8* noundef nonnull dereferenceable(1) [[DST]], i64 3)
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_3]])
-; ANY-NEXT:    [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[DST]], i64 [[N:%.*]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT:    [[ES_2:%.*]] = call ptr @stpncpy(ptr noundef nonnull dereferenceable(1) [[DST:%.*]], ptr noundef nonnull dereferenceable(1) [[DST]], i64 2)
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES_2]])
+; ANY-NEXT:    [[ES_3:%.*]] = call ptr @stpncpy(ptr noundef nonnull dereferenceable(1) [[DST]], ptr noundef nonnull dereferenceable(1) [[DST]], i64 3)
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES_3]])
+; ANY-NEXT:    [[ES_N:%.*]] = call ptr @stpncpy(ptr [[DST]], ptr [[DST]], i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES_N]])
 ; ANY-NEXT:    ret void
 ;
 ; Do not transform stpncpy(D, D, 2).
-  %es_2 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 2)
-  call void @sink(i8* %dst, i8* %es_2)
+  %es_2 = call ptr @stpncpy(ptr %dst, ptr %dst, i64 2)
+  call void @sink(ptr %dst, ptr %es_2)
 
 ; Do not transform stpncpy(D, D, 3).
-  %es_3 = call i8* @stpncpy(i8* %dst, i8* %dst, i64 3)
-  call void @sink(i8* %dst, i8* %es_3)
+  %es_3 = call ptr @stpncpy(ptr %dst, ptr %dst, i64 3)
+  call void @sink(ptr %dst, ptr %es_3)
 
 ; Do not transform stpncpy(D, D, N).
-  %es_n = call i8* @stpncpy(i8* %dst, i8* %dst, i64 %n)
-  call void @sink(i8* %dst, i8* %es_n)
+  %es_n = call ptr @stpncpy(ptr %dst, ptr %dst, i64 %n)
+  call void @sink(ptr %dst, ptr %es_n)
 
   ret void
 }
@@ -95,41 +95,40 @@ define void @call_stpncpy_overlap(i8* %dst, i64 %n) {
 
 ; Verify that stpncpy(D, "", N) calls are transformed to memset(D, 0, N).
 
-define void @fold_stpncpy_s0(i8* %dst, i64 %n) {
+define void @fold_stpncpy_s0(ptr %dst, i64 %n) {
 ; ANY-LABEL: @fold_stpncpy_s0(
-; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; ANY-NEXT:    store i8 0, i8* [[DST]], align 1
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
-; ANY-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; ANY-NEXT:    store i16 0, i16* [[TMP1]], align 1
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
-; ANY-NEXT:    call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
-; ANY-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[DST]])
+; ANY-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; ANY-NEXT:    store i8 0, ptr [[DST]], align 1
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[DST]])
+; ANY-NEXT:    store i16 0, ptr [[DST]], align 1
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[DST]])
+; ANY-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], i8 0, i64 9, i1 false)
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[DST]])
+; ANY-NEXT:    call void @llvm.memset.p0.i64(ptr nonnull align 1 [[DST]], i8 0, i64 [[N:%.*]], i1 false)
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[DST]])
 ; ANY-NEXT:    ret void
 ;
-  %ps0 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 4
+  %ps0 = getelementptr [5 x i8], ptr @s4, i32 0, i32 4
 
 ; Fold stpncpy(D, "", 0) to just D.
-  %es0_0 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 0)
-  call void @sink(i8* %dst, i8* %es0_0)
+  %es0_0 = call ptr @stpncpy(ptr %dst, ptr %ps0, i64 0)
+  call void @sink(ptr %dst, ptr %es0_0)
 
 ; Transform stpncpy(D, "", 1) to *D = '\0, D.
-  %es0_1 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 1)
-  call void @sink(i8* %dst, i8* %es0_1)
+  %es0_1 = call ptr @stpncpy(ptr %dst, ptr %ps0, i64 1)
+  call void @sink(ptr %dst, ptr %es0_1)
 
 ; Transform stpncpy(D, "", 2) to memset(D, 0, 2), D.
-  %es0_2 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 2)
-  call void @sink(i8* %dst, i8* %es0_2)
+  %es0_2 = call ptr @stpncpy(ptr %dst, ptr %ps0, i64 2)
+  call void @sink(ptr %dst, ptr %es0_2)
 
 ; Transform stpncpy(D, "", 9) to memset(D, 0, 9), D.
-  %es0_9 = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 9)
-  call void @sink(i8* %dst, i8* %es0_9)
+  %es0_9 = call ptr @stpncpy(ptr %dst, ptr %ps0, i64 9)
+  call void @sink(ptr %dst, ptr %es0_9)
 
 ; Transform stpncpy(D, "", n) to memset(D, 0, n), D.
-  %es0_n = call i8* @stpncpy(i8* %dst, i8* %ps0, i64 %n)
-  call void @sink(i8* %dst, i8* %es0_n)
+  %es0_n = call ptr @stpncpy(ptr %dst, ptr %ps0, i64 %n)
+  call void @sink(ptr %dst, ptr %es0_n)
 
   ret void
 }
@@ -138,63 +137,61 @@ define void @fold_stpncpy_s0(i8* %dst, i64 %n) {
 ; Verify that stpncpy(D, "4", N) calls are transformed to the equivalent
 ; of strncpy(D, "4", N) and the result folded to D + (N != 0).
 
-define void @fold_stpncpy_s1(i8* %dst) {
+define void @fold_stpncpy_s1(ptr %dst) {
 ; BE-LABEL: @fold_stpncpy_s1(
-; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; BE-NEXT:    store i8 52, i8* [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; BE-NEXT:    store i16 13312, i16* [[TMP1]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; BE-NEXT:    store i8 52, ptr [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; BE-NEXT:    store i16 13312, ptr [[DST]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @str.6, i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.7, i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_s1(
-; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; LE-NEXT:    store i8 52, i8* [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; LE-NEXT:    store i16 52, i16* [[TMP1]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(3) getelementptr inbounds ([4 x i8], [4 x i8]* @str.6, i64 0, i64 0), i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.7, i64 0, i64 0), i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
+; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; LE-NEXT:    store i8 52, ptr [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; LE-NEXT:    store i16 52, ptr [[DST]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(3) @str.6, i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.7, i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
 ; LE-NEXT:    ret void
 ;
-  %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
+  %ps1 = getelementptr [5 x i8], ptr @s4, i32 0, i32 3
 
 ; Fold stpncpy(D, "4", 0) to just D.
-  %es1_0 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 0)
-  call void @sink(i8* %dst, i8* %es1_0)
+  %es1_0 = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 0)
+  call void @sink(ptr %dst, ptr %es1_0)
 
 ; Transform stpncpy(D, "4", 1) to *D = '4', D + 1.
-  %es1_1 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 1)
-  call void @sink(i8* %dst, i8* %es1_1)
+  %es1_1 = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 1)
+  call void @sink(ptr %dst, ptr %es1_1)
 
 ; Transform stpncpy(D, "4", 2) to strncpy(D, "4", 2) + 1.
-  %es1_2 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 2)
-  call void @sink(i8* %dst, i8* %es1_2)
+  %es1_2 = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 2)
+  call void @sink(ptr %dst, ptr %es1_2)
 
 ; Transform stpncpy(D, "4", 3) to strncpy(D, "4", 3) + 1, which is then
 ; transformed to memcpy(D, "4", 2), D[2] = '\0', D + 1.
-  %es1_3 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 3)
-  call void @sink(i8* %dst, i8* %es1_3)
+  %es1_3 = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 3)
+  call void @sink(ptr %dst, ptr %es1_3)
 
 ; Transform stpncpy(D, "4", 9) to strncpy(D, "4", 9) + 1.
-  %es1_9 = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 9)
-  call void @sink(i8* %dst, i8* %es1_9)
+  %es1_9 = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 9)
+  call void @sink(ptr %dst, ptr %es1_9)
 
   ret void
 }
@@ -203,74 +200,69 @@ define void @fold_stpncpy_s1(i8* %dst) {
 ; Verify that stpncpy(D, "1234", N) calls are transformed to the equivalent
 ; of strncpy(D, "1234", N) and the result folded to D + min(4, N).
 
-define void @fold_stpncpy_s4(i8* %dst, i64 %n) {
+define void @fold_stpncpy_s4(ptr %dst, i64 %n) {
 ; BE-LABEL: @fold_stpncpy_s4(
-; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; BE-NEXT:    store i8 49, i8* [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; BE-NEXT:    store i16 12594, i16* [[TMP1]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; BE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
-; BE-NEXT:    store i32 825373492, i32* [[TMP2]], align 1
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; BE-NEXT:    store i8 49, ptr [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; BE-NEXT:    store i16 12594, ptr [[DST]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; BE-NEXT:    store i32 825373492, ptr [[DST]], align 1
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.8, i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_s4(
-; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; LE-NEXT:    store i8 49, i8* [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; LE-NEXT:    store i16 12849, i16* [[TMP1]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; LE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
-; LE-NEXT:    store i32 875770417, i32* [[TMP2]], align 1
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.8, i64 0, i64 0), i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
+; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; LE-NEXT:    store i8 49, ptr [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; LE-NEXT:    store i16 12849, ptr [[DST]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @s4, i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; LE-NEXT:    store i32 875770417, ptr [[DST]], align 1
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.8, i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
 ; LE-NEXT:    ret void
 ;
-  %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
 
 ; Fold stpncpy(D, "1234", 0) to just D.
-  %es4_0 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 0)
-  call void @sink(i8* %dst, i8* %es4_0)
+  %es4_0 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 0)
+  call void @sink(ptr %dst, ptr %es4_0)
 
 ; Transform stpncpy(D, "1234", 1) to *D = '4', D + 1.
-  %es4_1 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 1)
-  call void @sink(i8* %dst, i8* %es4_1)
+  %es4_1 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 1)
+  call void @sink(ptr %dst, ptr %es4_1)
 
 ; Transform stpncpy(D, "1234", 2) to strncpy(D, "1234", 2) + 2.
-  %es4_2 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 2)
-  call void @sink(i8* %dst, i8* %es4_2)
+  %es4_2 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 2)
+  call void @sink(ptr %dst, ptr %es4_2)
 
 ; Transform stpncpy(D, "1234", 3) to strncpy(D, "1234", 3) + 3
-  %es4_3 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 3)
-  call void @sink(i8* %dst, i8* %es4_3)
+  %es4_3 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 3)
+  call void @sink(ptr %dst, ptr %es4_3)
 
 ; Transform stpncpy(D, "1234", 4) to strncpy(D, "1234", 4) + 4.
-  %es4_4 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 4)
-  call void @sink(i8* %dst, i8* %es4_4)
+  %es4_4 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 4)
+  call void @sink(ptr %dst, ptr %es4_4)
 
 ; Transform stpncpy(D, "1234", 9) to strncpy(D, "1234", 9) + 4.
-  %es4_9 = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 9)
-  call void @sink(i8* %dst, i8* %es4_9)
+  %es4_9 = call ptr @stpncpy(ptr %dst, ptr @s4, i64 9)
+  call void @sink(ptr %dst, ptr %es4_9)
 
   ret void
 }
@@ -279,37 +271,35 @@ define void @fold_stpncpy_s4(i8* %dst, i64 %n) {
 ; Verify that a call to stpncpy(D, A, N) with a constant source larger
 ; than one byte is left alone when N is unknown.
 
-define void @call_stpncpy_xx_n(i8* %dst, i64 %n) {
+define void @call_stpncpy_xx_n(ptr %dst, i64 %n) {
 ; ANY-LABEL: @call_stpncpy_xx_n(
-; ANY-NEXT:    [[EA1_N:%.*]] = call i8* @stpncpy(i8* [[DST:%.*]], i8* nonnull dereferenceable(2) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 3), i64 [[N:%.*]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[EA1_N]])
-; ANY-NEXT:    [[EA4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* nonnull dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 [[N]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[EA4_N]])
-; ANY-NEXT:    [[ES1_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* nonnull dereferenceable(2) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 3), i64 [[N]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES1_N]])
-; ANY-NEXT:    [[ES4_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* nonnull dereferenceable(5) getelementptr inbounds ([5 x i8], [5 x i8]* @s4, i64 0, i64 0), i64 [[N]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES4_N]])
+; ANY-NEXT:    [[EA1_N:%.*]] = call ptr @stpncpy(ptr [[DST:%.*]], ptr nonnull dereferenceable(2) getelementptr inbounds ([4 x i8], ptr @a4, i64 0, i64 3), i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[EA1_N]])
+; ANY-NEXT:    [[EA4_N:%.*]] = call ptr @stpncpy(ptr [[DST]], ptr nonnull dereferenceable(5) @a4, i64 [[N]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[EA4_N]])
+; ANY-NEXT:    [[ES1_N:%.*]] = call ptr @stpncpy(ptr [[DST]], ptr nonnull dereferenceable(2) getelementptr inbounds ([5 x i8], ptr @s4, i64 0, i64 3), i64 [[N]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES1_N]])
+; ANY-NEXT:    [[ES4_N:%.*]] = call ptr @stpncpy(ptr [[DST]], ptr nonnull dereferenceable(5) @s4, i64 [[N]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES4_N]])
 ; ANY-NEXT:    ret void
 ;
 ; Do not transform stpncpy(D, A4 + 3, N) when N is unknown.
-  %pa1 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 3
-  %ea1_n = call i8* @stpncpy(i8* %dst, i8* %pa1, i64 %n)
-  call void @sink(i8* %dst, i8* %ea1_n)
+  %pa1 = getelementptr [4 x i8], ptr @a4, i32 0, i32 3
+  %ea1_n = call ptr @stpncpy(ptr %dst, ptr %pa1, i64 %n)
+  call void @sink(ptr %dst, ptr %ea1_n)
 
 ; Do not transform stpncpy(D, A4, N) when N is unknown.
-  %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
-  %ea4_n = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 %n)
-  call void @sink(i8* %dst, i8* %ea4_n)
+  %ea4_n = call ptr @stpncpy(ptr %dst, ptr @a4, i64 %n)
+  call void @sink(ptr %dst, ptr %ea4_n)
 
 ; Do not transform stpncpy(D, "4", N) when N is unknown.
-  %ps1 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 3
-  %es1_n = call i8* @stpncpy(i8* %dst, i8* %ps1, i64 %n)
-  call void @sink(i8* %dst, i8* %es1_n)
+  %ps1 = getelementptr [5 x i8], ptr @s4, i32 0, i32 3
+  %es1_n = call ptr @stpncpy(ptr %dst, ptr %ps1, i64 %n)
+  call void @sink(ptr %dst, ptr %es1_n)
 
 ; Likewise, do not transform stpncpy(D, "1234", N) when N is unknown.
-  %ps4 = getelementptr [5 x i8], [5 x i8]* @s4, i32 0, i32 0
-  %es4_n = call i8* @stpncpy(i8* %dst, i8* %ps4, i64 %n)
-  call void @sink(i8* %dst, i8* %es4_n)
+  %es4_n = call ptr @stpncpy(ptr %dst, ptr @s4, i64 %n)
+  call void @sink(ptr %dst, ptr %es4_n)
 
   ret void
 }
@@ -318,85 +308,80 @@ define void @call_stpncpy_xx_n(i8* %dst, i64 %n) {
 ; source array are transformed to the equivalent strncpy call and the result
 ; folded to D + min(4, N).
 
-define void @fold_stpncpy_a4(i8* %dst, i64 %n) {
+define void @fold_stpncpy_a4(ptr %dst, i64 %n) {
 ; BE-LABEL: @fold_stpncpy_a4(
-; BE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; BE-NEXT:    store i8 49, i8* [[DST]], align 1
-; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; BE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; BE-NEXT:    store i16 12594, i16* [[TMP1]], align 1
-; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
-; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; BE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
-; BE-NEXT:    store i32 825373492, i32* [[TMP2]], align 1
-; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
-; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
-; BE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
-; BE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; BE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; BE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; BE-NEXT:    store i8 49, ptr [[DST]], align 1
+; BE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; BE-NEXT:    store i16 12594, ptr [[DST]], align 1
+; BE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 3, i1 false)
+; BE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; BE-NEXT:    store i32 825373492, ptr [[DST]], align 1
+; BE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 5, i1 false)
+; BE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
+; BE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.9, i64 9, i1 false)
+; BE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; BE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR4]])
 ; BE-NEXT:    ret void
 ;
 ; LE-LABEL: @fold_stpncpy_a4(
-; LE-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; LE-NEXT:    store i8 49, i8* [[DST]], align 1
-; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 1
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[STPNCPY_END]])
-; LE-NEXT:    [[TMP1:%.*]] = bitcast i8* [[DST]] to i16*
-; LE-NEXT:    store i16 12849, i16* [[TMP1]], align 1
-; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 2
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(3) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 3, i1 false)
-; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 3
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR1]])
-; LE-NEXT:    [[TMP2:%.*]] = bitcast i8* [[DST]] to i32*
-; LE-NEXT:    store i32 875770417, i32* [[TMP2]], align 1
-; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR2]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(5) [[DST]], i8* noundef nonnull align 1 dereferenceable(5) getelementptr inbounds ([4 x i8], [4 x i8]* @a4, i64 0, i64 0), i64 5, i1 false)
-; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR3]])
-; LE-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(9) [[DST]], i8* noundef nonnull align 1 dereferenceable(9) getelementptr inbounds ([10 x i8], [10 x i8]* @str.9, i64 0, i64 0), i64 9, i1 false)
-; LE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, i8* [[DST]], i64 4
-; LE-NEXT:    call void @sink(i8* nonnull [[DST]], i8* nonnull [[ENDPTR4]])
+; LE-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; LE-NEXT:    store i8 49, ptr [[DST]], align 1
+; LE-NEXT:    [[STPNCPY_END:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 1
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[STPNCPY_END]])
+; LE-NEXT:    store i16 12849, ptr [[DST]], align 1
+; LE-NEXT:    [[ENDPTR:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(3) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 3, i1 false)
+; LE-NEXT:    [[ENDPTR1:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 3
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR1]])
+; LE-NEXT:    store i32 875770417, ptr [[DST]], align 1
+; LE-NEXT:    [[ENDPTR2:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR2]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(5) [[DST]], ptr noundef nonnull align 1 dereferenceable(5) @a4, i64 5, i1 false)
+; LE-NEXT:    [[ENDPTR3:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR3]])
+; LE-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(9) [[DST]], ptr noundef nonnull align 1 dereferenceable(9) @str.9, i64 9, i1 false)
+; LE-NEXT:    [[ENDPTR4:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 4
+; LE-NEXT:    call void @sink(ptr nonnull [[DST]], ptr nonnull [[ENDPTR4]])
 ; LE-NEXT:    ret void
 ;
 
-  %pa4 = getelementptr [4 x i8], [4 x i8]* @a4, i32 0, i32 0
 
 ; Fold stpncpy(D, A4, 0) to just D.
-  %ea4_0 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 0)
-  call void @sink(i8* %dst, i8* %ea4_0)
+  %ea4_0 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 0)
+  call void @sink(ptr %dst, ptr %ea4_0)
 
 ; Transform stpncpy(D, A4, 1) to *D = '4', D + 1.
-  %ea4_1 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 1)
-  call void @sink(i8* %dst, i8* %ea4_1)
+  %ea4_1 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 1)
+  call void @sink(ptr %dst, ptr %ea4_1)
 
 ; Transform stpncpy(D, A4, 2) to strncpy(D, A4, 2) + 2.
-  %ea4_2 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 2)
-  call void @sink(i8* %dst, i8* %ea4_2)
+  %ea4_2 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 2)
+  call void @sink(ptr %dst, ptr %ea4_2)
 
 ; Transform stpncpy(D, A4, 3) to strncpy(D, A4, 3) + 3
-  %ea4_3 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 3)
-  call void @sink(i8* %dst, i8* %ea4_3)
+  %ea4_3 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 3)
+  call void @sink(ptr %dst, ptr %ea4_3)
 
 ; Transform stpncpy(D, A4, 4) to strncpy(D, A4, 4) + 4.
-  %ea4_4 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 4)
-  call void @sink(i8* %dst, i8* %ea4_4)
+  %ea4_4 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 4)
+  call void @sink(ptr %dst, ptr %ea4_4)
 
 ; Transform stpncpy(D, A4, 5) to strncpy(D, A4, 5) + 4.
-  %ea4_5 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 5)
-  call void @sink(i8* %dst, i8* %ea4_5)
+  %ea4_5 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 5)
+  call void @sink(ptr %dst, ptr %ea4_5)
 
 ; Transform stpncpy(D, A4, 9) to strncpy(D, A4, 9) + 4.
-  %ea4_9 = call i8* @stpncpy(i8* %dst, i8* %pa4, i64 9)
-  call void @sink(i8* %dst, i8* %ea4_9)
+  %ea4_9 = call ptr @stpncpy(ptr %dst, ptr @a4, i64 9)
+  call void @sink(ptr %dst, ptr %ea4_9)
 
   ret void
 }
@@ -406,24 +391,24 @@ define void @fold_stpncpy_a4(i8* %dst, i64 %n) {
 ; the equivalent of strncpy and either folded to D if N == 0 or to
 ; *D ? D + 1 : D otherwise.
 
-define void @fold_stpncpy_s(i8* %dst, i8* %src) {
+define void @fold_stpncpy_s(ptr %dst, ptr %src) {
 ; ANY-LABEL: @fold_stpncpy_s(
-; ANY-NEXT:    call void @sink(i8* [[DST:%.*]], i8* [[DST]])
-; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, i8* [[SRC:%.*]], align 1
-; ANY-NEXT:    store i8 [[STXNCPY_CHAR0]], i8* [[DST]], align 1
+; ANY-NEXT:    call void @sink(ptr [[DST:%.*]], ptr [[DST]])
+; ANY-NEXT:    [[STXNCPY_CHAR0:%.*]] = load i8, ptr [[SRC:%.*]], align 1
+; ANY-NEXT:    store i8 [[STXNCPY_CHAR0]], ptr [[DST]], align 1
 ; ANY-NEXT:    [[STPNCPY_CHAR0CMP:%.*]] = icmp ne i8 [[STXNCPY_CHAR0]], 0
 ; ANY-NEXT:    [[STPNCPY_SEL_IDX:%.*]] = zext i1 [[STPNCPY_CHAR0CMP]] to i64
-; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, i8* [[DST]], i64 [[STPNCPY_SEL_IDX]]
-; ANY-NEXT:    call void @sink(i8* nonnull [[DST]], i8* [[STPNCPY_SEL]])
+; ANY-NEXT:    [[STPNCPY_SEL:%.*]] = getelementptr i8, ptr [[DST]], i64 [[STPNCPY_SEL_IDX]]
+; ANY-NEXT:    call void @sink(ptr nonnull [[DST]], ptr [[STPNCPY_SEL]])
 ; ANY-NEXT:    ret void
 ;
 ; Fold stpncpy(D, S, 0) to just D.
-  %es_0 = call i8* @stpncpy(i8* %dst, i8* %src, i64 0)
-  call void @sink(i8* %dst, i8* %es_0)
+  %es_0 = call ptr @stpncpy(ptr %dst, ptr %src, i64 0)
+  call void @sink(ptr %dst, ptr %es_0)
 
 ; Transform stpncpy(D, "", 1) to *D = '\0, D.
-  %es_1 = call i8* @stpncpy(i8* %dst, i8* %src, i64 1)
-  call void @sink(i8* %dst, i8* %es_1)
+  %es_1 = call ptr @stpncpy(ptr %dst, ptr %src, i64 1)
+  call void @sink(ptr %dst, ptr %es_1)
 
   ret void
 }
@@ -440,25 +425,25 @@ define void @fold_stpncpy_s(i8* %dst, i8* %src) {
 ; Also verify that the arguments of the call are annotated with the right
 ; attributes.
 
-define void @call_stpncpy_s(i8* %dst, i8* %src, i64 %n) {
+define void @call_stpncpy_s(ptr %dst, ptr %src, i64 %n) {
 ; ANY-LABEL: @call_stpncpy_s(
-; ANY-NEXT:    [[ES_2:%.*]] = call i8* @stpncpy(i8* noundef nonnull dereferenceable(1) [[DST:%.*]], i8* noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_2]])
-; ANY-NEXT:    [[ES_N:%.*]] = call i8* @stpncpy(i8* [[DST]], i8* [[SRC]], i64 [[N:%.*]])
-; ANY-NEXT:    call void @sink(i8* [[DST]], i8* [[ES_N]])
+; ANY-NEXT:    [[ES_2:%.*]] = call ptr @stpncpy(ptr noundef nonnull dereferenceable(1) [[DST:%.*]], ptr noundef nonnull dereferenceable(1) [[SRC:%.*]], i64 2)
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES_2]])
+; ANY-NEXT:    [[ES_N:%.*]] = call ptr @stpncpy(ptr [[DST]], ptr [[SRC]], i64 [[N:%.*]])
+; ANY-NEXT:    call void @sink(ptr [[DST]], ptr [[ES_N]])
 ; ANY-NEXT:    ret void
 ;
 ; Do not transform stpncpy(D, S, 2).  Both *D and *S must be derefernceable
 ; but neither D[1] nor S[1] need be.
-  %es_2 = call i8* @stpncpy(i8* %dst, i8* %src, i64 2)
-  call void @sink(i8* %dst, i8* %es_2)
+  %es_2 = call ptr @stpncpy(ptr %dst, ptr %src, i64 2)
+  call void @sink(ptr %dst, ptr %es_2)
 
 ; Do not transform stpncpy(D, S, N).  Both D and S must be nonnull but
 ; neither *D nor *S need be dereferenceable.
 ; TODO: Both D and S should be annotated nonnull and noundef regardless
 ; of the value of N.  See https://reviews.llvm.org/D124633.
-  %es_n = call i8* @stpncpy(i8* %dst, i8* %src, i64 %n)
-  call void @sink(i8* %dst, i8* %es_n)
+  %es_n = call ptr @stpncpy(ptr %dst, ptr %src, i64 %n)
+  call void @sink(ptr %dst, ptr %es_n)
 
   ret void
 }