diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index ff5880819020d..bdd4397a92d97 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -1058,17 +1058,3 @@ define amdgpu_ps i16 @saddo_i16_sv(i16 inreg %a, i16 %b) {
   %ret = add i16 %add, %of.zext
   ret i16 %ret
 }
-
-declare {i7, i1} @llvm.uadd.with.overflow.i7(i7 %a, i7 %b)
-declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
-declare {i16, i1} @llvm.uadd.with.overflow.i16(i16 %a, i16 %b)
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
-declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
-declare {<2 x i32>, <2 x i1>} @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
-
-declare {i7, i1} @llvm.sadd.with.overflow.i7(i7 %a, i7 %b)
-declare {i8, i1} @llvm.sadd.with.overflow.i8(i8 %a, i8 %b)
-declare {i16, i1} @llvm.sadd.with.overflow.i16(i16 %a, i16 %b)
-declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
-declare {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
-declare {<2 x i32>, <2 x i1>} @llvm.sadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
index 220dc70165e87..23d4acb2e7d22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomic_optimizations_mul_one.ll
@@ -3,16 +3,6 @@
 ; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-atomic-optimizer %s | FileCheck -check-prefix=IR %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32, ptr addrspace (8), i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-
 
 define amdgpu_cs void @atomic_add(<4 x i32> inreg %arg)  {
 ; IR-LABEL: define amdgpu_cs void @atomic_add(
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
index b04bc04ab2269..77ca4a3fc63b3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_udec_wrap.ll
@@ -11,8 +11,6 @@
 @lds0 = internal addrspace(3) global [512 x i32] undef
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
 ; CI-LABEL: lds_atomic_dec_ret_i32:
 ; CI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
index f6a997fb0fb01..3557b9cc82d85 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/atomicrmw_uinc_wrap.ll
@@ -11,8 +11,6 @@
 @lds0 = internal addrspace(3) global [512 x i32] undef, align 4
 @lds1 = internal addrspace(3) global [512 x i64] undef, align 8
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #1 {
 ; CI-LABEL: lds_atomic_inc_ret_i32:
 ; CI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bitcast_38_i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bitcast_38_i16.ll
index 5bea13af1649a..9ffe65886ac8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bitcast_38_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bitcast_38_i16.ll
@@ -78,7 +78,6 @@ bb:
   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %i5, i32 0, i32 0, i32 0, <8 x i32> zeroinitializer, i32 0, i32 0)
   ret void
 }
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10PLUS: {{.*}}
 ; GPRIDX: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
index 132dc876b3b05..dc51c5a491f5c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bswap.ll
@@ -677,14 +677,5 @@ define i64 @v_bswap_i48(i64 %src) {
   ret i64 %zext
 }
 
-declare i16 @llvm.bswap.i16(i16) #1
-declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) #1
-declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) #1
-declare i32 @llvm.bswap.i32(i32) #1
-declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) #1
-declare i64 @llvm.bswap.i64(i64) #1
-declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #1
-declare i48 @llvm.bswap.i48(i48) #1
-
 attributes #0 = { convergent nounwind readnone }
 attributes #1 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
index 0816eae28f614..a57a5523ac6f8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-no-rtn.ll
@@ -402,9 +402,3 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
-
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
index c0b84c914ce5c..511aca325cca6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f32-rtn.ll
@@ -425,9 +425,3 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr ad
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret float %ret
 }
-
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
index 7c4069b4b3138..cfd5329c3b695 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.f64.ll
@@ -389,9 +389,3 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
-
-declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
index 9514bea86e4d1..4536cb3248004 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -278,9 +278,3 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
-
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
index 632ad55fdf892..be74977adc784 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-rtn.ll
@@ -163,9 +163,3 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half>
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
index e5aa82278a830..2aa19af36a1b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-schedule.ll
@@ -39,14 +39,6 @@ define amdgpu_cs void @test1_ptrs_reorderable(ptr addrspace(8) inreg %buf, i32 %
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
-
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3
-
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) #4
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) #5
-
 attributes #2 = { nounwind readonly }
 attributes #3 = { nounwind writeonly }
 attributes #4 = { nounwind memory(argmem: read) }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 24652982c6584..9ea9865e675d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -449,7 +449,5 @@ define void @func_caller_byval(ptr addrspace(5) %argptr) {
   ret void
 }
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #1
-
 attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
 attributes #1 = { argmemonly nofree nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
index c7676e9da6f49..38602d8b266b0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll
@@ -189,10 +189,6 @@ define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2
   ret float %fmed
 }
 
-declare half @llvm.amdgcn.fmed3.f16(half, half, half)
-declare float @llvm.amdgcn.fmed3.f32(float, float, float)
-declare float @llvm.minnum.f32(float, float)
-
 attributes #0 = {"amdgpu-ieee"="true"}
 attributes #1 = {"amdgpu-ieee"="false"}
 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
index ca0047bba6c4b..420c99d1462ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll
@@ -402,15 +402,6 @@ define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
   ret float %fmed
 }
-
-declare half @llvm.minnum.f16(half, half)
-declare half @llvm.maxnum.f16(half, half)
-declare float @llvm.minnum.f32(float, float)
-declare float @llvm.maxnum.f32(float, float)
-declare double @llvm.minnum.f64(double, double)
-declare double @llvm.maxnum.f64(double, double)
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 attributes #0 = {"amdgpu-ieee"="true"}
 attributes #1 = {"amdgpu-ieee"="false"}
 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
index b9846a6a555dc..22aabd58d961d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-ext-fma.ll
@@ -441,9 +441,4 @@ define amdgpu_vs <4 x float> @test_v4f16_v4f32_add_ext_fma_mul_rhs(<4 x float> %
     ret <4 x float> %d
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
index 362e25fa932fc..2c3ec93562127 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-fma-mul.ll
@@ -1034,11 +1034,4 @@ define <4 x double> @test_f64_add_mul_rhs(<4 x double> %a, <4 x double> %b, <4 x
   %z = fadd fast <4 x double> %e, %y
   ret <4 x double> %z
 }
-
-declare <4 x double> @llvm.fmuladd.v4f64(<4 x double>, <4 x double>, <4 x double>) #0
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-declare double @llvm.fmuladd.f64(double, double, double) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.ll
index ecf3b229170d7..7b7ecd8537b4b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rsq.ll
@@ -216,6 +216,3 @@ define amdgpu_cs float @sqrt_rcp_contract(float inreg %arg1) {
   %b = call contract float @llvm.sqrt.f32(float %a)
   ret float %b
 }
-
-declare float @llvm.sqrt.f32(float)
-declare float @llvm.amdgcn.rcp.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
index 621394fd290b0..a603b3f314c64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
 ; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
 
-declare i64 @llvm.smax.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
-
 ; GCN-LABEL: {{^}}v_clamp_i64_i16
 ; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
 ; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
index b60f4c1250264..cc505a5c790f7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
@@ -254,10 +254,5 @@ define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1
   ret float %result
 }
 
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1 immarg) #1
-declare i1 @llvm.amdgcn.class.f32(float, i32) #1
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 02781e763f44a..036acd2e4f1bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 define float @v_uitofp_i32_to_f32_mask255(i32 %arg0) nounwind {
 ; SI-LABEL: v_uitofp_i32_to_f32_mask255:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index d4d5cb18bbd30..ab118625d3a4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -325,18 +325,13 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32)
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i64 @llvm.amdgcn.s.getpc()
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(read)
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg)
 
 attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index c25b0f2128266..e70089fa4ba77 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -250,5 +250,3 @@ bb4:
 bb9:
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
index 56bd7ddde6f52..6d27e29c3f578 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dropped_debug_info_assert.ll
@@ -72,8 +72,6 @@ entry:
   ret void
 }
 
-declare ptr @llvm.returnaddress(i32 immarg) #0
-
 attributes #0 = { nofree nosync nounwind readnone willreturn }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
index 5dae7885f6bfb..dd4b5c3274e1a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/dynamic-alloca-divergent.ll
@@ -23,6 +23,4 @@ define void @func_dynamic_stackalloc_vgpr_align4(i32 %n) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index 89cd18ad9be70..7b49ed0f88267 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -4148,11 +4148,6 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) {
   ret <2 x half> %fdiv
 }
 
-declare half @llvm.fabs.f16(half)
-declare half @llvm.sqrt.f16(half)
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
-declare <2 x half> @llvm.sqrt.v2f16(<2 x half>)
-
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10-FLUSH: {{.*}}
 ; GFX10-IEEE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index aa9ebb9226cdd..c9563534fbec3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -111,6 +111,4 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data
   ret float %ret
 }
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index 68d8e3d747b86..635574e230b31 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -82,6 +82,4 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
   ret double %ret
 }
 
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
index feebe791ddec7..6793ebc448218 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.v2f16.ll
@@ -31,5 +31,3 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(ptr %ptr, <2 x
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr, <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
index 63e7339d829e1..aa8f6776dbe78 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll
@@ -1496,5 +1496,3 @@ bb:
   %load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
index 34635b077cd92..6616e4533f424 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/floor.f64.ll
@@ -286,8 +286,5 @@ define amdgpu_ps <2 x float> @s_floor_f64_fneg_fabs(double inreg %x) {
   ret <2 x float> %cast
 }
 
-declare double @llvm.floor.f64(double) #0
-declare double @llvm.fabs.f64(double) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { "amdgpu-ieee"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
index 99e6c5d06a0e1..58c172490604b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll
@@ -1019,18 +1019,4 @@ define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x,
   ret float %fma
 }
 
-declare half @llvm.fma.f16(half, half, half) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare double @llvm.fma.f64(double, double, double) #0
-
-declare half @llvm.fabs.f16(half) #0
-declare float @llvm.fabs.f32(float) #0
-
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0
-
-declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #0
-declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
index 75c4cd53e3bfc..5994459cf799d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll
@@ -549,15 +549,6 @@ define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
   ret float %fmed
 }
-
-declare half @llvm.minnum.f16(half, half)
-declare half @llvm.maxnum.f16(half, half)
-declare float @llvm.minnum.f32(float, float)
-declare float @llvm.maxnum.f32(float, float)
-declare double @llvm.minnum.f64(double, double)
-declare double @llvm.maxnum.f64(double, double)
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 attributes #0 = {"amdgpu-ieee"="true"}
 attributes #1 = {"amdgpu-ieee"="false"}
 attributes #2 = {"no-nans-fp-math"="true"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
index fab9487551697..55e6d07b29405 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp-atomics-gfx940.ll
@@ -1,14 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -global-isel -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
-
 ; bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
-declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
-declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
-declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
-declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
 
 define amdgpu_kernel void @flat_atomic_fadd_f32_noret(ptr %ptr, float %data) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index 255c6dedbd6e1..10fc46bc5296f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -2,26 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX940
 
-declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
-
 define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
 ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
 ; GFX90A:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
index 0577117e9d9e1..01201e8cced66 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fpow.ll
@@ -1811,13 +1811,3 @@ define float @v_pow_f32_fneg_rhs(float %x, float %y) {
   %pow = call float @llvm.pow.f32(float %x, float %neg.y)
   ret float %pow
 }
-
-declare half @llvm.pow.f16(half, half)
-declare float @llvm.pow.f32(float, float)
-declare double @llvm.pow.f64(double, double)
-
-declare half @llvm.fabs.f16(half)
-declare float @llvm.fabs.f32(float)
-
-declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
-declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
index f9b98059be0b3..cd91788961531 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
@@ -8051,35 +8051,4 @@ define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.fshl.i7(i7, i7, i7) #0
-declare i8 @llvm.fshl.i8(i8, i8, i8) #0
-declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.fshl.i16(i16, i16, i16) #0
-declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.fshl.i24(i24, i24, i24) #0
-declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
-
-declare i32 @llvm.fshl.i32(i32, i32, i32) #0
-declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.fshl.i48(i48, i48, i48) #0
-
-declare i64 @llvm.fshl.i64(i64, i64, i64) #0
-declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.fshl.i128(i128, i128, i128) #0
-declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
index c8455665e7b40..8a4b9d4380dc9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
@@ -8120,35 +8120,4 @@ define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %a
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.fshr.i7(i7, i7, i7) #0
-declare i8 @llvm.fshr.i8(i8, i8, i8) #0
-declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.fshr.i16(i16, i16, i16) #0
-declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.fshr.i24(i24, i24, i24) #0
-declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0
-
-declare i32 @llvm.fshr.i32(i32, i32, i32) #0
-declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.fshr.i48(i48, i48, i48) #0
-
-declare i64 @llvm.fshr.i64(i64, i64, i64) #0
-declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.fshr.i128(i128, i128, i128) #0
-declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
index 21832dc320e42..e81fce1fa22c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-no-rtn.ll
@@ -218,7 +218,4 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
index e48d281f37c9a..881463baf6eb6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f32-rtn.ll
@@ -239,7 +239,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ret float %ret
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
index b058ad1023e13..8cb72f07cd654 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.f64.ll
@@ -248,7 +248,4 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
   ret double %ret
 }
 
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
index c71beff4dc5c1..ea50d1712be0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-no-rtn.ll
@@ -110,6 +110,3 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
-
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
index 9f05ecc4cf430..1914053389ef5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-atomic-fadd.v2f16-rtn.ll
@@ -67,6 +67,3 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(p
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
index 21fa4afb374cd..0cec4a0a76bba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/hip.extern.shared.array.ll
@@ -131,5 +131,3 @@ define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
   store double %val2, ptr addrspace(3) %arrayidx2, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
index 2d95008e3fd2d..3dd2a779f34ef 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/i1-copy.ll
@@ -55,5 +55,3 @@ main_body:
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %a, float %b, float undef, float undef, i1 true, i1 true)
   ret void
 }
-
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
index 10cbc56cc5fbe..38ae99727cc16 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/image-waterfall-loop-O0.ll
@@ -198,6 +198,4 @@ bb:
   ret <4 x float> %ret
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 8859ac69923a9..70955d294fbf7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -360,14 +360,5 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-declare i64 @llvm.amdgcn.dispatch.id()
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare i1 @llvm.amdgcn.is.shared(ptr)
-declare i1 @llvm.amdgcn.is.private(ptr)
-declare void @llvm.trap()
-declare void @llvm.debugtrap()
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
index 3abc21f812e14..086e5cb1993ad 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.large.ll
@@ -158,7 +158,5 @@ define amdgpu_kernel void @v_insert_v64i32_37(ptr addrspace(1) %ptr.in, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-waves-per-eu"="1,10" }
 attributes #1 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir
index 6a89085d020b6..c325d9cf8f138 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.reloc.constant.mir
@@ -6,8 +6,6 @@
   define void @reloc_constant_sgpr32() { ret void }
   define void @reloc_constant_vgpr32() { ret void }
 
-  declare i32 @llvm.amdgcn.reloc.constant(metadata)
-
   !0 = !{!"arst"}
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
index 3438cbdd476d8..f4b31247ad762 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-cs-chain.ll
@@ -4,7 +4,6 @@
 
 declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
 declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
-declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn
 
 define amdgpu_cs_chain void @chain_call(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) {
   ; GFX11-LABEL: name: chain_call
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
index 2e950117c740b..940ee8b457da1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgcn-sendmsg.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - | FileCheck %s
 
-declare void @llvm.amdgcn.s.sendmsg(i32 immarg, i32)
-
 define amdgpu_ps void @test_sendmsg(i32 inreg %m0) {
   ; CHECK-LABEL: name: test_sendmsg
   ; CHECK: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
index e9292f4e34dcd..eddf8dfb740da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
@@ -119,8 +119,4 @@ define amdgpu_kernel void @workgroup_id_xz() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
index e98f298adf58e..8485b7b9b65e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
@@ -152,7 +152,5 @@ define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) {
   ret <2 x i16> %vgpr
 }
 
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)  #0
-
 attributes #0 = { nounwind }
 attributes #1 = { "InitialPSInputAddr"="0x00002" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
index 875b725a3c76c..f7a083afddd75 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll
@@ -103,6 +103,4 @@ define amdgpu_vs i32 @non_void_ret() {
   ret i32 0
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)  #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
index a5f59b15c11b8..2a57632ca37ac 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-implicit-args.ll
@@ -1214,10 +1214,6 @@ define amdgpu_kernel void @test_only_workitem_id_xz() #0 !reqd_work_group_size !
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.z() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable willreturn }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
index 3a31ab4ab9d0a..0414858446119 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-constrained-fp.ll
@@ -215,15 +215,5 @@ define float @v_constained_sqrt_f32_fpexcept_strict(float %x) #0 {
   ret float %val
 }
 
-declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.frem.f32(float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata) #1
-declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
index 0897b3794f8d9..81c09c51f86d3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
@@ -26,6 +26,3 @@ define amdgpu_kernel void @rcp_nsz(float %arg0) {
   store float %res, ptr addrspace(1) undef
   ret void
 }
-
-declare float @llvm.fma.f32(float, float, float)
-declare float @llvm.amdgcn.rcp.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
index ec07b0b1d4f45..754511cafb3cd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-invariant.ll
@@ -108,8 +108,4 @@ define void @test_memmove_p1_constaddr_i64(ptr addrspace(1) %dst, ptr addrspace(
   ret void
 }
 
-declare void @llvm.memcpy.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.inline.p1.p4.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(4) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memmove.p1.p4.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(4) nocapture readonly, i64, i1 immarg) #0
-
 attributes #0 = { argmemonly nofree nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
index b83b8a0a6d7d4..70e83632ea82d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll
@@ -253,21 +253,5 @@ define void @test_memset_p3_i16(ptr addrspace(3) %dst, i8 %val) {
   ret void
 }
 
-declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p1.p3.i16(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memcpy.p3.p1.i16(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i64(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i64, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1 immarg) #0
-declare void @llvm.memmove.p1.p3.i16(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i16, i1 immarg) #0
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p1.i32(ptr addrspace(1) nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p1.i16(ptr addrspace(1) nocapture writeonly, i8, i16, i1 immarg) #1
-declare void @llvm.memset.p3.i64(ptr addrspace(3) nocapture writeonly, i8, i64, i1 immarg) #1
-declare void @llvm.memset.p3.i32(ptr addrspace(3) nocapture writeonly, i8, i32, i1 immarg) #1
-declare void @llvm.memset.p3.i16(ptr addrspace(3) nocapture writeonly, i8, i16, i1 immarg) #1
-
 attributes #0 = { argmemonly nounwind willreturn }
 attributes #1 = { argmemonly nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
index 101bb6c0ed123..52a477d483563 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-metadata.ll
@@ -17,8 +17,6 @@ define i32 @reloc_constant() {
   ret i32 %res
 }
 
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 
 !0 = !{!"arst"}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll
index b53610a0f22e5..0468cc3c309a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-prefetch.ll
@@ -28,5 +28,3 @@ define void @prefetch_write(ptr %ptr) {
   call void @llvm.prefetch.p0(ptr %ptr, i32 1, i32 1, i32 1)
   ret void
 }
-
-declare void @llvm.prefetch.p0(ptr, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
index 7a8e521817a37..441d0868fe10f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
@@ -40,6 +40,3 @@ define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
 ;   %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> %mask)
 ;   ret <2 x ptr> %masked
 ; }
-
-declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
-declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
index 2eab2a458e11e..0880a4700d80d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-readnone-intrinsic-callsite.ll
@@ -15,7 +15,5 @@ define amdgpu_kernel void @getreg_callsite_attributes() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.s.getreg(i32) #0
-
 attributes #0 = { nounwind readonly inaccessiblememonly }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
index 00de01a694403..c6ad8d70519e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sat.ll
@@ -17,7 +17,6 @@ define i16 @uaddsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.uadd.sat.i16(i16, i16)
 
 define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: uaddsat_i32
@@ -32,7 +31,6 @@ define i32 @uaddsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.uadd.sat.i32(i32, i32)
 
 define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: uaddsat_i64
@@ -53,7 +51,6 @@ define i64 @uaddsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.uadd.sat.i64(i64, i64)
 
 define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: uaddsat_v2i32
@@ -74,7 +71,6 @@ define <2 x i32> @uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>)
 
 define i16 @saddsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: saddsat_i16
@@ -92,7 +88,6 @@ define i16 @saddsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.sadd.sat.i16(i16, i16)
 
 define i32 @saddsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: saddsat_i32
@@ -107,7 +102,6 @@ define i32 @saddsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.sadd.sat.i32(i32, i32)
 
 define i64 @saddsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: saddsat_i64
@@ -128,7 +122,6 @@ define i64 @saddsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.sadd.sat.i64(i64, i64)
 
 define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: saddsat_v2i32
@@ -149,7 +142,6 @@ define <2 x i32> @saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>)
 
 define i16 @usubsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: usubsat_i16
@@ -167,7 +159,6 @@ define i16 @usubsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.usub.sat.i16(i16, i16)
 
 define i32 @usubsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: usubsat_i32
@@ -182,7 +173,6 @@ define i32 @usubsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.usub.sat.i32(i32, i32)
 
 define i64 @usubsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: usubsat_i64
@@ -203,7 +193,6 @@ define i64 @usubsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.usub.sat.i64(i64, i64)
 
 define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: usubsat_v2i32
@@ -224,7 +213,6 @@ define <2 x i32> @usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>)
 
 define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i16
@@ -242,7 +230,6 @@ define i16 @ssubsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.ssub.sat.i16(i16, i16)
 
 define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i32
@@ -257,7 +244,6 @@ define i32 @ssubsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.ssub.sat.i32(i32, i32)
 
 define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: ssubsat_i64
@@ -278,7 +264,6 @@ define i64 @ssubsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.ssub.sat.i64(i64, i64)
 
 define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: ssubsat_v2i32
@@ -299,7 +284,6 @@ define <2 x i32> @ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>)
 
 define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i16
@@ -317,7 +301,6 @@ define i16 @ushlsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.ushl.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.ushl.sat.i16(i16, i16)
 
 define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i32
@@ -332,7 +315,6 @@ define i32 @ushlsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.ushl.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.ushl.sat.i32(i32, i32)
 
 define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: ushlsat_i64
@@ -353,7 +335,6 @@ define i64 @ushlsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.ushl.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.ushl.sat.i64(i64, i64)
 
 define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: ushlsat_v2i32
@@ -374,7 +355,6 @@ define <2 x i32> @ushlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>)
 
 define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i16
@@ -392,7 +372,6 @@ define i16 @sshlsat_i16(i16 %lhs, i16 %rhs) {
   %res = call i16 @llvm.sshl.sat.i16(i16 %lhs, i16 %rhs)
   ret i16 %res
 }
-declare i16 @llvm.sshl.sat.i16(i16, i16)
 
 define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i32
@@ -407,7 +386,6 @@ define i32 @sshlsat_i32(i32 %lhs, i32 %rhs) {
   %res = call i32 @llvm.sshl.sat.i32(i32 %lhs, i32 %rhs)
   ret i32 %res
 }
-declare i32 @llvm.sshl.sat.i32(i32, i32)
 
 define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) {
   ; CHECK-LABEL: name: sshlsat_i64
@@ -428,7 +406,6 @@ define i64 @sshlsat_i64(i64 %lhs, i64 %rhs) {
   %res = call i64 @llvm.sshl.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %res
 }
-declare i64 @llvm.sshl.sat.i64(i64, i64)
 
 define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   ; CHECK-LABEL: name: sshlsat_v2i32
@@ -449,4 +426,3 @@ define <2 x i32> @sshlsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
   %res = call <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
   ret <2 x i32> %res
 }
-declare <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32>, <2 x i32>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
index cb81871be3a58..82093160aca21 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-struct-return-intrinsics.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -stop-after=irtranslator -o - %s | FileCheck %s
 
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
-
 define amdgpu_ps void @test_div_scale(float %arg0, float %arg1) {
   ; CHECK-LABEL: name: test_div_scale
   ; CHECK: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
index c35952c5b77ed..1c097fdfd72d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lds-misaligned-bug.ll
@@ -128,5 +128,3 @@ bb:
   store <4 x i32> %v8, ptr addrspace(3) %gep, align 8
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
index 36ba7c2ecfac3..d0b0d8b7acc2a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.atomic.dim.a16.ll
@@ -1891,32 +1891,4 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2d.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.3d.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.cube.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1darray.i32.i16(i32, i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darray.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2dmsaa.i32.i16(i32, i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.2darraymsaa.i32.i16(i32, i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
index 8e4e4cf2c5b87..7b7fb614f05c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.dim.a16.ll
@@ -5361,52 +5361,6 @@ main_body:
   ret <4 x float> %data
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #3
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #2
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
index f61f985cd24ab..5ba460c6cdccf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.d16.ll
@@ -1605,13 +1605,4 @@ define amdgpu_ps <4 x half> @image_load_tfe_v4f16_dmask_0000(<8 x i32> inreg %rs
   ret <4 x half> %tex
 }
 
-declare half @llvm.amdgcn.image.load.2d.f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { half, i32 } @llvm.amdgcn.image.load.2d.sl_f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f16i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
index adf7e6d38b989..6722ca94be45f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2d.ll
@@ -793,13 +793,4 @@ define amdgpu_ps <4 x float> @image_load_tfe_v4f32_dmask_0000(<8 x i32> inreg %r
   ret <4 x float> %tex
 }
 
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.image.load.2d.sl_f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v2f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v3f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
index 4d36e0f797016..97adfacebce27 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.2darraymsaa.ll
@@ -125,7 +125,4 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr ad
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
index 2c155b72c649f..6c5090a91b190 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.load.3d.ll
@@ -103,7 +103,4 @@ define amdgpu_ps float @image_load_3d_tfe_f32(<8 x i32> inreg %rsrc, i32 %s, i32
   ret float %tex
 }
 
-declare float @llvm.amdgcn.image.load.3d.f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.image.load.3d.sl_f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
index 5b017ad89a0ed..b0db4feabe851 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.a16.ll
@@ -7314,62 +7314,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll
index 241170b94318a..01525ea2af416 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.d.ll
@@ -472,8 +472,3 @@ main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32 15, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, float %s, float %t, float %r, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.3d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.3d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll
index f05b258c974d1..07acfed72a033 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.a16.ll
@@ -404,7 +404,3 @@ main_body:
   %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret <4 x float> %v
 }
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half,  half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
index cc2a8ba9c4d5d..3c50be79304c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.sample.g16.ll
@@ -2420,28 +2420,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
index 12234088adca6..91da9ce5e5a0b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-llvm.amdgcn.image.store.2d.d16.ll
@@ -480,9 +480,4 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
index 4328d47969a1e..38a0fa32f83ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx7.mir
@@ -27,9 +27,6 @@
     %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b)
     ret <5 x half> %fma
   }
-
-  declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
-  declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>)
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
index 61af5e01ed4c6..260a037b61c57 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-vector-args-gfx8-plus.mir
@@ -28,9 +28,6 @@
     %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b)
     ret <5 x half> %fma
   }
-
-  declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
-  declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>)
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 3ef059057ac8e..b5c4819bbedc1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -3,15 +3,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX8
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -o - < %s | FileCheck %s --check-prefixes=GFX,GFX10
 
-declare i16 @llvm.abs.i16(i16, i1)
-declare i32 @llvm.abs.i32(i32, i1)
-declare i64 @llvm.abs.i64(i64, i1)
-declare <2 x i8> @llvm.abs.v2i8(<2 x i8>, i1)
-declare <3 x i8> @llvm.abs.v3i8(<3 x i8>, i1)
-declare <2 x i16> @llvm.abs.v2i16(<2 x i16>, i1)
-declare <3 x i16> @llvm.abs.v3i16(<3 x i16>, i1)
-declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
-
 define amdgpu_cs i16 @abs_sgpr_i16(i16 inreg %arg) {
 ; GFX-LABEL: abs_sgpr_i16:
 ; GFX:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
index 46036681e905e..1831748335e4d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ballot.i32(i1)
-declare i32 @llvm.ctpop.i32(i32)
-
 ; Test ballot(0)
 
 define amdgpu_cs i32 @constant_false() {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
index a18f843440445..722ecac483f46 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %s | FileCheck %s
 
-declare i64 @llvm.amdgcn.ballot.i64(i1)
-declare i64 @llvm.ctpop.i64(i64)
-
 ; Test ballot(0)
 
 define amdgpu_cs i64 @constant_false() {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
index d165fb577efc2..47e6813648335 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.dispatch.ptr.ll
@@ -12,8 +12,6 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 attributes #0 = { readnone }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 4caf83774bbba..ffd4567ba80bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -1572,8 +1572,4 @@ exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0
-declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
index 1a49a38158122..e2e7a7391326b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
@@ -1685,10 +1685,5 @@ define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %ou
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
-declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
-declare float @llvm.fabs.f32(float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
index 03ed6837291c8..116917580eeed 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fadd.ll
@@ -274,6 +274,4 @@ define float @ds_fadd_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fadd(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
index e4c4f42b137ef..b269a27a3a63f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll
@@ -366,6 +366,4 @@ define float @ds_fmax_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmax(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
index 0f6fb5acd56ad..ebd4f08c5123c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmin.ll
@@ -274,6 +274,4 @@ define float @ds_fmin_f32_vv_volatile(ptr addrspace(3) %ptr, float %val) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.ds.fmin(ptr addrspace(3) nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index d7b7f03d428bf..f10421a77e572 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -56,5 +56,3 @@ bb:
   store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
-
-declare void @llvm.amdgcn.end.cf.i32(i32 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
index 81d8472ebd46e..56f29205b709d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i64.ll
@@ -33,5 +33,3 @@ bb:
   store volatile i32 0, ptr addrspace(1) undef
   ret void
 }
-
-declare void @llvm.amdgcn.end.cf.i64(i64 %val)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
index 1d9514c58ab9c..553f8f3f90357 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fdot2.ll
@@ -106,6 +106,4 @@ define float @v_fdot2_inline_literal_c(<2 x half> %a, <2 x half> %b) {
   ret float %ret
 }
 
-declare float @llvm.amdgcn.fdot2(<2 x half>, <2 x half>, float, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
index 36455f190510e..124ffb527b93b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.fmul.legacy.ll
@@ -586,9 +586,6 @@ define float @v_mul_legacy_f32_2.0_swap(float %b) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { "denormal-fp-math-f32"="preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
index ade6e55b482bb..dddb823e94801 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.csub.ll
@@ -209,7 +209,5 @@ define amdgpu_kernel void @global_atomic_csub_sgpr_base_offset_nortn(ptr addrspa
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1
-
 attributes #0 = { nounwind willreturn }
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
index 9a66fe10ccdf3..3ec94c801c2da 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd-with-ret.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
 ; RUN: not --crash llc -global-isel < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.f32.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>)
-
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.global.atomic.fadd)
 
 ; GFX90A-LABEL: {{^}}global_atomic_fadd_f32_rtn:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
index 752ddbb896c6b..37bd7cb4ceafe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.global.atomic.fadd.ll
@@ -120,7 +120,4 @@ define void @global_atomic_fadd_v2f16_off_neg2047(ptr addrspace(1) %ptr, <2 x ha
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #0
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) nocapture, <2 x half>) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
index 1e0cbde7df0db..ace0be55009bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i32.ll
@@ -42,5 +42,3 @@ entry:
   store volatile i32 %break, ptr addrspace(1) undef
   ret void
 }
-
-declare i32 @llvm.amdgcn.if.break.i32(i1, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
index 9718cef5c6db0..25c238a11a073 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.if.break.i64.ll
@@ -23,5 +23,3 @@ entry:
   store volatile i64 %break, ptr addrspace(1) undef
   ret void
 }
-
-declare i64 @llvm.amdgcn.if.break.i64(i1, i64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
index a3796197e4c53..837b4bcc7557a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.a16.ll
@@ -2116,46 +2116,4 @@ main_body:
   ret <2 x float> %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i16(i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i16(i32, i32, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i16(i32, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i16(i32, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i16(i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i16(i64, i64, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i16(i64, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i16(i64, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i16(i64, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
index 221e2fd4f00f7..c070c0af93502 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.ll
@@ -3986,46 +3986,4 @@ main_body:
   ret <2 x float> %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.sub.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umin.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.smax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.umax.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.and.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.or.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.xor.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.inc.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.dec.1d.i64.i32(i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2d.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.3d.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.cube.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.1darray.i64.i32(i64, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darray.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2dmsaa.i64.i32(i64, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare i64 @llvm.amdgcn.image.atomic.add.2darraymsaa.i64.i32(i64, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
index 916b9c0835d41..0158f10f6d0d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -1012,19 +1012,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32 immarg, half, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32 immarg, half, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
index 841f4f1ac055e..137ebfd618dbc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.dim.ll
@@ -1214,20 +1214,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
index 7c04fe3159184..9bf759428a5ca 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll
@@ -539,17 +539,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
index d1a36d552e21b..3570029dbcd7d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.a16.ll
@@ -397,14 +397,5 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
index 496f9f428580b..4ec74f8376781 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.getresinfo.ll
@@ -697,17 +697,5 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
index 19b0057d69b69..64b2c05e070ca 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.d16.ll
@@ -1143,14 +1143,4 @@ define amdgpu_ps float @load_1d_v4f16_tfe_dmask_xyzw(<8 x i32> inreg %rsrc, i32
   ret float %vv
 }
 
-declare half @llvm.amdgcn.image.load.1d.half.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.image.load.1d.v2f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.image.load.1d.v3f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
index ecf81f633761d..4524e1b104a19 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.1d.ll
@@ -983,14 +983,4 @@ define amdgpu_ps float @load_1d_f32_tfe_dmask_0(<8 x i32> inreg %rsrc, i32 %s) {
   ret float %vv
 }
 
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.image.load.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
-declare { float, i32 } @llvm.amdgcn.image.load.1d.sl_f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f32i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
index fb4c92353cb99..bb5844c11e670 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2d.ll
@@ -268,7 +268,4 @@ define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
index 2c8b8126aa09a..fa0536466fa7b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.a16.ll
@@ -310,7 +310,4 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
index 11ad98ac5fd1a..032f0f9ccaf8f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.2darraymsaa.ll
@@ -280,7 +280,4 @@ define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inre
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
index f5d11fcdff80a..a0bcf57b838ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.a16.ll
@@ -280,7 +280,4 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
index 162a58613065f..8a6cf4dccb980 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.load.3d.ll
@@ -272,7 +272,4 @@ define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc,
   ret <4 x float> %v.vec
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
index a39d7ae1fe4f4..a65c0d293b3d6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.cd.g16.ll
@@ -107,15 +107,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
index a874b30aff275..4e111878ed739 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.g16.ll
@@ -352,19 +352,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
index d07eadcd64146..fdc99bda11f08 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll
@@ -141,9 +141,4 @@ define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t,
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
index 9e445d034edc9..16d91305759a2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.ll
@@ -816,9 +816,4 @@ define amdgpu_ps void @image_store_f32_dmask_1111(<8 x i32> inreg %rsrc, i32 inr
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.2d.f32.i32(float, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v2f32.i32(<2 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v3f32.i32(<3 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
index 7d693d8a1bb29..0eafa6b1d7912 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.implicit.ptr.buffer.ll
@@ -10,7 +10,5 @@ define amdgpu_ps i32 @test_ps() #1 {
   ret i32 %value
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
index 623360f6b1d9c..c92576e144c9b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.inreg.ll
@@ -167,13 +167,5 @@ main_body:
   ret half %res
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
-declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
index 92a0dd5748742..debf0c22e8d5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.interp.p1.f16.ll
@@ -111,6 +111,4 @@ define amdgpu_ps float @interp_f16_sgpr_i(float inreg %i,i32 inreg %m0) #0 {
   ret float %res
 }
 
-declare float @llvm.amdgcn.interp.p1.f16(float, i32 immarg, i32 immarg, i1 immarg, i32) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 47e476de74cf4..68b5d0f37bd1e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -9,12 +9,6 @@
 ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float3 ray_origin, float3 ray_dir, float3 ray_inv_dir, uint4 texture_descr)
 ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float3 ray_origin, half3 ray_dir, half3 ray_inv_dir, uint4 texture_descr)
 
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> inreg %tdescr) {
 ; GCN-LABEL: image_bvh_intersect_ray:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
index 303dc46e2c884..071ca12f9f17d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll
@@ -145,9 +145,6 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
 attributes #0 = { nounwind readnone speculatable }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
index 63702d2587574..9700c33677d40 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll
@@ -145,9 +145,6 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
 attributes #0 = { nounwind readnone speculatable }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
index 7fc9842824b01..4188c7bccc97d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -119,9 +119,6 @@ define ptr addrspace(4) @func_kernarg_segment_ptr() {
   ret ptr addrspace(4) %ptr
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
 attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll
index 4a151aeca87e4..2126c249e7d23 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.make.buffer.rsrc.ll
@@ -225,8 +225,3 @@ define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i3
   %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
   ret float %value
 }
-
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32)
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) nocapture readnone, i16, i32, i32)
-declare float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
index e7faabb72ab69..299fcd1d48bb8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll
@@ -1,15 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x double>, i32, i32, i32)
-declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 {
 ; GCN-LABEL: test_mfma_f32_32x32x4bf16_1k:
 ; GCN:       ; %bb.0: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
index c0cd068607200..1184c3ea2afaa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mov.dpp.ll
@@ -90,7 +90,4 @@ define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #0
-declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #0
-
 attributes #0 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
index 1eb0c2a877425..7b5531652efa8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
@@ -12,8 +12,6 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-
 attributes #0 = { nounwind readnone }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
index 570a39d0fa5fb..61108f25d0637 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.add.ll
@@ -459,7 +459,4 @@ define amdgpu_ps float @raw_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_vof
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.raw.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
index c2799e5836a97..d21391164720f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.cmpswap.ll
@@ -796,6 +796,3 @@ define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr
   %cast = bitcast i64 %ret to double
   ret double %cast
 }
-
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32 immarg)
-declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
index d59c5a6a2609c..156911bae018b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
 ; RUN: not --crash llc -global-isel < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn)
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index c96fc017ae936..d28ee0f00caa1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -487,7 +487,4 @@ define amdgpu_ps void @raw_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0
   ret void
 }
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
index 36d5e914d40be..dd74e1ce822e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.f16.ll
@@ -430,9 +430,4 @@ define amdgpu_ps <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffs
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
index 23efaa4d2bd91..70c8c94722172 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.format.ll
@@ -325,9 +325,4 @@ define amdgpu_ps <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voff
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index 102a9bd840b09..0b287ddea6631 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -1677,16 +1677,3 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg)
-
-declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg)
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg)
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg)
-
-declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32 immarg)
-declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
index 6541085b72e54..a4450c7486ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
@@ -806,7 +806,3 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
   call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
index 1f89150f09ced..03857bb2a5be8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
@@ -602,8 +602,3 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
   call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
index 030f8dae0ef79..8f0da2e43244c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
@@ -1549,15 +1549,3 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
index 93d68443c7843..a356ed19380b9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.add.ll
@@ -251,7 +251,4 @@ define amdgpu_ps float @raw_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll
index 56b2d0452dd45..74ac365ff4d46 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.ll
@@ -425,6 +425,3 @@ define amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__
   %cast = bitcast i64 %ret to double
   ret double %cast
 }
-
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32 immarg)
-declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll
index 798a3ee1d75fd..e4df568815b5a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd-with-ret.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
 ; RUN: not --crash llc -global-isel < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, 0 :: (volatile dereferenceable load store (s32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn)
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
index 999f42ff905ab..b301548aa04e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
@@ -487,7 +487,4 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsr
   ret void
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
index 5b19b1c913a94..33123b0efa0f9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.f16.ll
@@ -312,9 +312,4 @@ define amdgpu_ps <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_v
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
index 2dc688db86e4f..9e572b0572aa4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.format.ll
@@ -176,9 +176,4 @@ define amdgpu_ps <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
index 7b8b028128dd3..df771c1a2a46b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.load.ll
@@ -951,16 +951,3 @@ define amdgpu_ps float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_s
   %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg)
-
-declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg)
-
-declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32 immarg)
-declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll
index 3ed6bbdd36156..647d0d4e59e1b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f16.ll
@@ -571,7 +571,3 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll
index dee83a9b0a6ec..e29c94c6b463c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.format.f32.ll
@@ -331,8 +331,3 @@ define amdgpu_ps void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_vo
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll
index 2c99ce8694bcc..dd810f311a9b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.buffer.store.ll
@@ -874,15 +874,3 @@ define amdgpu_ps void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__
   call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 5000, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
index a799e203d6439..579ba1e681a5e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.f16.ll
@@ -387,9 +387,4 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
index 3e135472ebbb1..f2a25fb9fec9c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.load.ll
@@ -224,9 +224,4 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll
index 725faa1b4a49f..d068f035f8b13 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.f16.ll
@@ -569,8 +569,3 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll
index a12a6005df24e..0161b0ade79dd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.i8.ll
@@ -346,5 +346,3 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.i8(i8 %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i8(i8, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll
index 9db5c160a6236..6eadd67fede86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.ptr.tbuffer.store.ll
@@ -690,8 +690,3 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
index 1cfb15391be36..cb120166796dc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.f16.ll
@@ -547,9 +547,4 @@ define amdgpu_ps half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
index 4f8b20d10c874..6cc6ad420fc52 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.load.ll
@@ -411,9 +411,4 @@ define amdgpu_ps float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
index b9d0cb52d2405..09fe0ba0c2f5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.f16.ll
@@ -825,8 +825,3 @@ define amdgpu_ps void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soff
   call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
index c1fb4aacafe1d..536f0e5afd8c5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.i8.ll
@@ -510,5 +510,3 @@ define amdgpu_ps void @raw_tbuffer_store_i8__vgpr_rsrc__sgpr_voffset__vgpr_soffs
   call void @llvm.amdgcn.raw.tbuffer.store.i8(i8 %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
index 09227af922a6e..4817779ae12b9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.tbuffer.store.ll
@@ -1248,8 +1248,3 @@ define amdgpu_ps void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soff
   call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
index 3def36766fbe0..916c4ba8d9192 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
@@ -281,11 +281,6 @@ define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
   ret double %rsq_clamp
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "amdgpu-ieee"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 1a2a50f444ddb..739963b243bf9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -6845,30 +6845,3 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
   ret float %val
 }
-
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
-declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
-declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
-
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32      immarg)
-declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
-declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
-declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
-
-declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
-declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
-declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
-
-declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
-declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
-
-declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
-declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
-
-declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
-declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
index af50f56a87226..6b150a00687e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.setreg.ll
@@ -1476,6 +1476,4 @@ define void @test_setreg_roundingmode_var_vgpr(i32 %var.mode) {
   ret void
 }
 
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
index a370408a9efcb..4b20a46c35d86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.sleep.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.sleep(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_sleep:
 ; GCN: s_sleep 0{{$}}
 ; GCN: s_sleep 1{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 6eed92ba1d71c..3be74f85e7c37 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -948,8 +948,5 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
-declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
index e2dab03e410aa..a059fcc53d6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
@@ -354,6 +354,4 @@ define i32 @v_sdot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot2(<2 x i16>, <2 x i16>, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
index 06560afee3c9a..6db2e3b1713be 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot4.ll
@@ -129,6 +129,4 @@ define i32 @v_sdot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot4(i32, i32, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
index 0d729351f65a7..acc86aa6fc5cc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot8.ll
@@ -83,6 +83,4 @@ define i32 @v_sdot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.sdot8(i32, i32, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index cbee039df7fd0..a6725cb2811f0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -93,8 +93,4 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
-
 attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
index 200d38a0d5699..2773c8e7ec99b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.softwqm.ll
@@ -77,10 +77,4 @@ define amdgpu_ps <3 x float> @softwqm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.softwqm.i1(i1) #0
-declare float @llvm.amdgcn.softwqm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.softwqm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.softwqm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.softwqm.f64(double) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
index ab720ce8f942c..1b5b3201b844b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.add.ll
@@ -453,7 +453,4 @@ define amdgpu_ps float @struct_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__vgpr_
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.struct.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
index f9f70ecadfe60..586d9626dcb6b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.cmpswap.ll
@@ -843,6 +843,3 @@ define amdgpu_ps double @struct_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__s
   %cast = bitcast i64 %ret to double
   ret double %cast
 }
-
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32 immarg)
-declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
index 89daf3ae88cbc..2a975d576c6bb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
@@ -3,9 +3,6 @@
 
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn)
 
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
 ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{[0-9]+}} idxen offen glc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 8589fe9fd056d..ad0422639e886 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -518,7 +518,4 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
index 870588014cd29..2e583079dc1e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.f16.ll
@@ -518,10 +518,4 @@ define amdgpu_ps half @struct_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex__vg
   ret half %fval
 }
 
-declare half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
index 686b849ff58f9..a7b4deb55f63f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.format.ll
@@ -580,13 +580,4 @@ define amdgpu_cs void @struct_buffer_load_format_i32_tfe(<4 x i32> inreg %rsrc,
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
index 94ce8aac8a4c6..57e31f9a795a3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.load.ll
@@ -834,15 +834,4 @@ define amdgpu_ps float @struct_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret float %val
 }
 
-declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
index f62a15d470afd..9624f896dad20 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f16.ll
@@ -412,9 +412,3 @@ define amdgpu_ps void @struct_buffer_store_format_i16__vgpr_val__sgpr_rsrc__vgpr
   call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.i16(i16, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
index 8a395f0e73222..99c00189074e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.format.f32.ll
@@ -323,9 +323,3 @@ define amdgpu_ps void @struct_buffer_store_format_i32__vgpr_val__sgpr_rsrc__vgpr
   call void @llvm.amdgcn.struct.buffer.store.format.i32(i32 %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.format.i32(i32, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
index b89ed46ba0550..5afc5bca97d48 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.store.ll
@@ -496,16 +496,3 @@ define amdgpu_ps void @struct_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_vinde
   call void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
index 2e7323068d108..b4afa5d20c7c0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.add.ll
@@ -245,7 +245,4 @@ define amdgpu_ps float @struct_ptr_buffer_atomic_add_i32__vgpr_val__sgpr_rsrc__v
   ret float %cast
 }
 
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i64(i64, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll
index e8e6cab4edbe8..04593c380a4bd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.ll
@@ -449,6 +449,3 @@ define amdgpu_ps double @struct_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cm
   %cast = bitcast i64 %ret to double
   ret double %cast
 }
-
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll
index 5b0b91f983fe6..70aba81b7fb0b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd-with-ret.ll
@@ -3,9 +3,6 @@
 
 ; GFX908: LLVM ERROR: cannot select: %{{[0-9]+}}:vgpr_32(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr(<4 x s32>), %{{[0-9]+}}:vgpr(s32), %{{[0-9]+}}:vgpr, %{{[0-9]+}}:sgpr, 0, 0, -1 :: (volatile dereferenceable load store (s32) on %ir.rsrc.load, align 1, addrspace 8) (in function: buffer_atomic_add_f32_rtn)
 
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
 ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{[0-9]+}} idxen offen glc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
index 54657982493f7..d0689a027355a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
@@ -518,7 +518,4 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
index 6c0319ef570d6..7aed92708bd50 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.f16.ll
@@ -372,10 +372,4 @@ define amdgpu_ps half @struct_ptr_buffer_load_format_i16__sgpr_rsrc__vgpr_vindex
   ret half %fval
 }
 
-declare half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
index 9edc245549113..52a945698b4ee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.format.ll
@@ -322,13 +322,4 @@ define amdgpu_cs void @struct_ptr_buffer_load_format_i32_tfe(ptr addrspace(8) in
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
index 66c62e9ce8a9c..a23673aa80ec1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.load.ll
@@ -419,15 +419,4 @@ define amdgpu_ps float @struct_ptr_buffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr
   ret float %val
 }
 
-declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll
index 25fe7d2877ce3..18e758660ea85 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f16.ll
@@ -286,9 +286,3 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_i16__vgpr_val__sgpr_rsrc__
   call void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll
index 3a4c258537814..d54cb5ab9d887 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.format.f32.ll
@@ -173,9 +173,3 @@ define amdgpu_ps void @struct_ptr_buffer_store_format_i32__vgpr_val__sgpr_rsrc__
   call void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32 %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll
index 2e0a12b9d969c..183c8839d652c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.buffer.store.ll
@@ -272,16 +272,3 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4f16_sgpr_rsrc__vgpr_val__vgpr_v
   call void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
index 1a57c2e77bddf..41f283132eea3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.f16.ll
@@ -374,9 +374,4 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret half %val
 }
 
-declare half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
index 63143ed718054..77abc654173e3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.ptr.tbuffer.load.ll
@@ -211,9 +211,4 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
   ret float %val
 }
 
-declare float @llvm.amdgcn.struct.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
index f270f87aae66d..06e05a762964e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.f16.ll
@@ -521,9 +521,4 @@ define amdgpu_ps half @struct_tbuffer_load_f16__sgpr_rsrc__vgpr_vindex__vgpr_vof
   ret half %val
 }
 
-declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
index 7d3ecd363befb..53fd36c07f2c6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.tbuffer.load.ll
@@ -391,9 +391,4 @@ define amdgpu_ps float @struct_tbuffer_load_f32__sgpr_rsrc__vgpr_vindex__vgpr_vo
   ret float %val
 }
 
-declare float @llvm.amdgcn.struct.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.struct.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.struct.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
index 65ecaa14880ac..58a5d8e5805c8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot4.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sudot4(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
-
 define i32 @test_llvm_amdgcn_sudot4_uu(i32 %a, i32 %b, i32 %c) {
 ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
index 92bad5ebd5ed5..e4cb96200d95e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sudot8.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sudot8(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
-
 define i32 @test_llvm_amdgcn_sudot8_uu(i32 %a, i32 %b, i32 %c) {
 ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
index 1d5cc1e1ec046..aad0d18ef6da6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
@@ -171,6 +171,4 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
   ret void
 }
 
-declare double @llvm.amdgcn.trig.preop.f64(double, i32) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index 43a0f018dc1cd..53bdb8fde0043 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -1034,8 +1034,5 @@ define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
-declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
index 3acff52874dd9..6b830dd8ee7a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
@@ -337,6 +337,4 @@ define i32 @v_udot2_shuffle10_b(<2 x i16> %a, <2 x i16> %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot2(<2 x i16>, <2 x i16>, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
index b14af9e043e09..92d72a3042fa7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot4.ll
@@ -148,6 +148,4 @@ define i32 @v_udot4_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot4(i32, i32, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
index a664c8aa508ef..7f5ab1d584bb6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot8.ll
@@ -84,6 +84,4 @@ define i32 @v_udot8_fnegv2f16_a(<2 x half> %a, i32 %b, i32 %c) {
   ret i32 %r
 }
 
-declare i32 @llvm.amdgcn.udot8(i32, i32, i32, i1 immarg) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
index f7adfe47b64f2..79beb9f70d4bf 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
@@ -98,9 +98,5 @@ define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i6
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #1
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
index 4e44ef33cd13f..e938b5f8d21e0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_32.ll
@@ -1,15 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W32
 
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half> , <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16> , <8 x float>)
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg)
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half> , <16 x half>, i1 immarg)
-declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg)
-declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg)
-
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
index a72b7e4f36396..798a4603811d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wmma_64.ll
@@ -1,15 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W64
 
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16>, <4 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32>, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <4 x i32>, i1 immarg)
-
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
index df201c1903b64..dfd6194569b24 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workgroup.id.ll
@@ -3,10 +3,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
 ; ALL-LABEL: {{^}}test_workgroup_id_x:
 
 ; MESA3D: .amd_kernel_code_t
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 2e62d13f1e69a..ca0e11eb50bec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -8,10 +8,6 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
 ; MESA: .section .AMDGPU.config
 ; MESA: .long 47180
 ; MESA-NEXT: .long 132{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
index a36b25ccfa48e..800842d30e495 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.demote.ll
@@ -1179,16 +1179,6 @@ define amdgpu_ps void @wqm_deriv_loop(<2 x float> %input, float %arg, i32 %index
   ret void
 }
 
-declare void @llvm.amdgcn.wqm.demote(i1) #0
-declare i1 @llvm.amdgcn.live.mask() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #4
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
index edc93f4effb82..18f6895c27432 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wqm.ll
@@ -77,10 +77,4 @@ define amdgpu_ps <3 x float> @wqm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.wqm.i1(i1) #0
-declare float @llvm.amdgcn.wqm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.wqm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.wqm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.wqm.f64(double) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
index 17f3dd7398dea..0f40c6cf16b5d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.writelane.ll
@@ -314,9 +314,6 @@ define amdgpu_ps float @test_writelane_s_m0_v(i32 inreg %data, i32 %vdst.in) #0
   ret float %writelane.cast
 }
 
-declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
index bf48683b0bec9..62b12b98a989d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.wwm.ll
@@ -155,15 +155,4 @@ define amdgpu_ps <3 x float> @strict_wwm_v3f32(<3 x float> %val) {
   ret <3 x float> %ret
 }
 
-declare i1 @llvm.amdgcn.wwm.i1(i1) #0
-declare float @llvm.amdgcn.wwm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.wwm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.wwm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.wwm.f64(double) #0
-declare i1 @llvm.amdgcn.strict.wwm.i1(i1) #0
-declare float @llvm.amdgcn.strict.wwm.f32(float) #0
-declare <2 x half> @llvm.amdgcn.strict.wwm.v2f16(<2 x half>) #0
-declare <3 x float> @llvm.amdgcn.strict.wwm.v3f32(<3 x float>) #0
-declare double @llvm.amdgcn.strict.wwm.f64(double) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
index dfc9995fdbc99..a40926c884f60 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.inline.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.memcpy.inline.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg)
-
 define amdgpu_cs void @test(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; GCN-LABEL: test:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
index 6b05455613515..a3348627e3880 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memcpy.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=19 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=21 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memcpy.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1 immarg)
-
 define amdgpu_cs void @memcpy_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; LOOP-LABEL: memcpy_p1i8:
 ; LOOP:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
index 4d4da869d7507..a31afb94b1fee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memmove.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memmove.p1.p1.i32(ptr addrspace(1), ptr addrspace(1), i32, i1)
-
 define amdgpu_cs void @memmove_p1i8(ptr addrspace(1) %dst, ptr addrspace(1) %src) {
 ; LOOP-LABEL: memmove_p1i8:
 ; LOOP:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
index 3edd2e0914a6e..b7a5bada515f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.memset.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=3 %s -o - | FileCheck -check-prefix=LOOP %s
 ; RUN: llc -global-isel -mtriple=amdgcn-- -verify-machineinstrs -mem-intrinsic-expand-size=5 %s -o - | FileCheck -check-prefix=UNROLL %s
 
-declare void @llvm.memset.p1.i32(ptr addrspace(1), i8, i32, i1)
-
 define amdgpu_cs void @memset_p1i8(ptr addrspace(1) %dst, i8 %val) {
 ; LOOP-LABEL: memset_p1i8:
 ; LOOP:       ; %bb.0: ; %loadstoreloop.preheader
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
index b169063d67872..e832d88ac003c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.powi.ll
@@ -554,8 +554,4 @@ define float @v_powi_neg128_f32(float %l) {
 ;   ret double %res
 ; }
 
-declare half @llvm.powi.f16.i32(half, i32) #0
-declare float @llvm.powi.f32.i32(float, i32) #0
-declare double @llvm.powi.f64.i32(double, i32) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
index 9e58b716adb1a..6a48b40e23ae9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll
@@ -142,6 +142,3 @@ define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) {
 
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs.ll
index 0f84f12a12574..f2acb77e7ca83 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/minmaxabs.ll
@@ -1,16 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck %s
 
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.umax.i32(i32, i32)
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.smax.i32(i32, i32)
-
-declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
-declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
-
 define i32 @test_umin_i32(i32 %a, i32 %b) {
 ; CHECK-LABEL: test_umin_i32:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
index 1140ef88ac7f8..5eba7699ac654 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN:  llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GFX10 %s
 ; RUN:  llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GFX11 %s
-declare i32 @llvm.amdgcn.workitem.id.x()
 
 ; A 64-bit multiplication where no arguments were zero extended.
 define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
index b940dc74839b2..cb9a31090a6b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
@@ -262,8 +262,6 @@ bb.1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
index 87ba9259106f0..f9fe6c762b37f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn-exp.mir
@@ -11,8 +11,6 @@
     call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 1.0, float 1.0, float 1.0, float 1.0, i1 0, i1 0)
     ret void
   }
-
-  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
 ...
 
 ---
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
index 61263e0efa2ea..c15665e1dac3f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.load.1d.ll
@@ -358,6 +358,4 @@ define amdgpu_ps void @load_1d_sgpr_vaddr__vgpr_srsrc(<8 x i32> %rsrc, i32 inreg
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
index d6a7ae8d867fe..300e791700606 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.image.sample.1d.ll
@@ -534,6 +534,4 @@ define amdgpu_ps void @sample_1d_vgpr_vaddr__vgpr_rsrc__vgpr_samp(<8 x i32> %rsr
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
index 8c7bdb867d168..41afd08da750e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.buffer.load.ll
@@ -208,5 +208,3 @@ define amdgpu_ps float @raw_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
   %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
index 19793f7020dc8..18d8f08079fe9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.raw.ptr.buffer.load.ll
@@ -208,5 +208,3 @@ define amdgpu_ps float @raw_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_voffset__
   %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
index ccf3647060e42..f084318526c14 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.ll
@@ -3215,30 +3215,3 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
   %val = call float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %rsrc, i32 %offset, i32 0)
   ret float %val
 }
-
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg)
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg)
-declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32 immarg)
-declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32 immarg)
-
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32      immarg)
-declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.s.buffer.load.v3f32(<4 x i32>, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
-declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32 immarg)
-declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32 immarg)
-
-declare i96 @llvm.amdgcn.s.buffer.load.i96(<4 x i32>, i32, i32 immarg)
-declare i256 @llvm.amdgcn.s.buffer.load.i256(<4 x i32>, i32, i32 immarg)
-declare i512 @llvm.amdgcn.s.buffer.load.i512(<4 x i32>, i32, i32 immarg)
-
-declare <16 x i16> @llvm.amdgcn.s.buffer.load.v16i16(<4 x i32>, i32, i32 immarg)
-declare <32 x i16> @llvm.amdgcn.s.buffer.load.v32i16(<4 x i32>, i32, i32 immarg)
-
-declare <4 x i64> @llvm.amdgcn.s.buffer.load.v4i64(<4 x i32>, i32, i32 immarg)
-declare <8 x i64> @llvm.amdgcn.s.buffer.load.v8i64(<4 x i32>, i32, i32 immarg)
-
-declare <4 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v4p1(<4 x i32>, i32, i32 immarg)
-declare <8 x ptr addrspace(1)> @llvm.amdgcn.s.buffer.load.v8p1(<4 x i32>, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
index f664e62761ad5..2934446ff3532 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.load.ll
@@ -204,5 +204,3 @@ define amdgpu_ps float @struct_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
   %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
index 0f72586ed6c12..6cb849c208b83 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.buffer.store.ll
@@ -205,5 +205,3 @@ define amdgpu_ps void @struct_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex__vg
   call void @llvm.amdgcn.struct.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
index b835b3a3e380b..3aebdcba991e6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.load.ll
@@ -204,5 +204,3 @@ define amdgpu_ps float @struct_ptr_buffer_load__vgpr_rsrc__vgpr_val__vgpr_vindex
   %val = call float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
index 0cefc373dd7cd..a6d36281b9597 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.struct.ptr.buffer.store.ll
@@ -205,5 +205,3 @@ define amdgpu_ps void @struct_ptr_buffer_store__vgpr_rsrc__vgpr_val__vgpr_vindex
   call void @llvm.amdgcn.struct.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
index 74af51f0c1676..19cda9d1a26fb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir
@@ -102,8 +102,6 @@
   define amdgpu_kernel void @load_constant_i96_align4() { ret void }
   define amdgpu_kernel void @load_constant_i96_align8() { ret void }
   define amdgpu_kernel void @load_constant_i96_align16() { ret void }
-
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
   attributes #0 = { nounwind readnone }
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
index c50187f594901..e7264e489de8a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect.mir
@@ -47,8 +47,6 @@
   define amdgpu_kernel void @load_constant_v4i16_from_8_align8(ptr addrspace(4) %ptr0) {
     ret void
   }
-
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
   attributes #0 = { nounwind readnone }
   !0 = !{}
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index a6f9bb7ee055d..079c5b49c2916 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -6313,34 +6313,4 @@ define amdgpu_ps <2 x i128> @s_saddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.sadd.sat.i7(i7, i7) #0
-declare i8 @llvm.sadd.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.sadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.sadd.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.sadd.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.sadd.sat.i24(i24, i24) #0
-
-declare i32 @llvm.sadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.sadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.sadd.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.sadd.sat.i48(i48, i48) #0
-
-declare i64 @llvm.sadd.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.sadd.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
index 1061f0003bd48..1b2c421d34141 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i32.ll
@@ -75,7 +75,6 @@ define i32 @v_sdiv_i32(i32 %num, i32 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i32 @s_sdiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_sdiv_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 84906c01a4698..240ee7cfd32ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -189,7 +189,6 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_sdiv_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index 7ad19a4797003..f78e783443728 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -721,6 +721,4 @@ define <2 x i32> @v_shl_v2i32_zext_v2i16(<2 x i16> %x) {
   ret <2 x i32> %shl
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
index dc13dee4f148a..9fd4b86ea4161 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/smed3.ll
@@ -204,8 +204,3 @@ define i32 @test_non_inline_constant_i32(i32 %a) {
   %smed = call i32 @llvm.smin.i32(i32 %smax, i32 65)
   ret i32 %smed
 }
-
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.smax.i32(i32, i32)
-declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
-declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
index 1bb606f36e48d..f38ed1b22e267 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i32.ll
@@ -69,7 +69,6 @@ define i32 @v_srem_i32(i32 %num, i32 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_srem_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 83ebc84e1f84a..79a4472ec8ac0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -183,7 +183,6 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_srem_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 320dfbb4980e4..17c6c64b70343 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -6381,34 +6381,4 @@ define amdgpu_ps <2 x i128> @s_ssubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.ssub.sat.i7(i7, i7) #0
-declare i8 @llvm.ssub.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.ssub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.ssub.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.ssub.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.ssub.sat.i24(i24, i24) #0
-
-declare i32 @llvm.ssub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.ssub.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.ssub.sat.i48(i48, i48) #0
-
-declare i64 @llvm.ssub.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.ssub.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
index 15740ee5476e8..674a2c40c6138 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f16.ll
@@ -149,10 +149,4 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %
   ret <2 x half> %val
 }
 
-declare half @llvm.fabs.f16(half)
-declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata)
-declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata)
-declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata)
-declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
index 5955c590e9d1c..e89d47c6970bc 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f32.ll
@@ -95,10 +95,4 @@ define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float>
   ret <2 x float> %val
 }
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
-declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
index 04a07c42c934c..fea56c194fb86 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/strict_fma.f64.ll
@@ -95,10 +95,4 @@ define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x doubl
   ret <2 x double> %val
 }
 
-declare double @llvm.fabs.f64(double)
-declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
index 3741983a3067b..3be8b5a730722 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll
@@ -1058,17 +1058,3 @@ define amdgpu_ps i16 @ssubo_i16_sv(i16 inreg %a, i16 %b) {
   %ret = sub i16 %sub, %of.zext
   ret i16 %ret
 }
-
-declare {i7, i1} @llvm.usub.with.overflow.i7(i7 %a, i7 %b)
-declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
-declare {i16, i1} @llvm.usub.with.overflow.i16(i16 %a, i16 %b)
-declare {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
-declare {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
-declare {<2 x i32>, <2 x i1>} @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
-
-declare {i7, i1} @llvm.ssub.with.overflow.i7(i7 %a, i7 %b)
-declare {i8, i1} @llvm.ssub.with.overflow.i8(i8 %a, i8 %b)
-declare {i16, i1} @llvm.ssub.with.overflow.i16(i16 %a, i16 %b)
-declare {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b)
-declare {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
-declare {<2 x i32>, <2 x i1>} @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 1821d29d4b050..a5873df49fbb9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -3433,34 +3433,4 @@ define amdgpu_ps <2 x i128> @s_uaddsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.uadd.sat.i7(i7, i7) #0
-declare i8 @llvm.uadd.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.uadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.uadd.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.uadd.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.uadd.sat.i24(i24, i24) #0
-
-declare i32 @llvm.uadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.uadd.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.uadd.sat.i48(i48, i48) #0
-
-declare i64 @llvm.uadd.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.uadd.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
index cd01148fa7dd7..09fb1778f456b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i32.ll
@@ -57,7 +57,6 @@ define i32 @v_udiv_i32(i32 %num, i32 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i32 @s_udiv_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_udiv_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index d15551365707b..387e519b0f49e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -181,7 +181,6 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_udiv_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
index 7e38762e7b559..b0009de117ab0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/umed3.ll
@@ -204,8 +204,3 @@ define i32 @test_non_inline_constant_u32(i32 %a) {
   %umed = call i32 @llvm.umin.i32(i32 %umax, i32 65)
   ret i32 %umed
 }
-
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.umax.i32(i32, i32)
-declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
-declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll
index 82a15f7497f52..6ee9951e41e7c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/unsupported-ptr-add.ll
@@ -10,5 +10,3 @@ body:
   ret float %res
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg)
-
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
index 31f61b9968b8b..0886a37ffeeb4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i32.ll
@@ -53,7 +53,6 @@ define i32 @v_urem_i32(i32 %num, i32 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i32 @s_urem_i32(i32 inreg %num, i32 inreg %den) {
 ; GISEL-LABEL: s_urem_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 07480a0ce0c2e..6fdfbd323c468 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -178,7 +178,6 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 }
 
 ; FIXME: This is a workaround for not handling uniform VGPR case.
-declare i32 @llvm.amdgcn.readfirstlane(i32)
 
 define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-LABEL: s_urem_i64:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index a60370cd460f9..04ffbad20b898 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -3301,34 +3301,4 @@ define amdgpu_ps <2 x i128> @s_usubsat_v2i128(<2 x i128> inreg %lhs, <2 x i128>
   ret <2 x i128> %result
 }
 
-declare i7 @llvm.usub.sat.i7(i7, i7) #0
-declare i8 @llvm.usub.sat.i8(i8, i8) #0
-declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>) #0
-declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>) #0
-
-declare i16 @llvm.usub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare <5 x i16> @llvm.usub.sat.v5i16(<5 x i16>, <5 x i16>) #0
-declare <6 x i16> @llvm.usub.sat.v6i16(<6 x i16>, <6 x i16>) #0
-declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) #0
-
-declare i24 @llvm.usub.sat.i24(i24, i24) #0
-
-declare i32 @llvm.usub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <5 x i32> @llvm.usub.sat.v5i32(<5 x i32>, <5 x i32>) #0
-declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-
-declare i48 @llvm.usub.sat.i48(i48, i48) #0
-
-declare i64 @llvm.usub.sat.i64(i64, i64) #0
-declare <2 x i64> @llvm.usub.sat.v2i64(<2 x i64>, <2 x i64>) #0
-
-declare i128 @llvm.usub.sat.i128(i128, i128) #0
-declare <2 x i128> @llvm.usub.sat.v2i128(<2 x i128>, <2 x i128>) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index e500aae7e0f3c..56085a0d60fec 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -488,17 +488,3 @@ bb:
   store <8 x half> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
-declare float @llvm.fabs.f32(float)
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f16.v8f32(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8i16.v8f32(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f16.v16f16.v8f32.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v16f16.v8f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
index 3037c1ec2829e..aa01949e93171 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-imm.ll
@@ -494,26 +494,3 @@ bb:
   store <8 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16.v8f16.v8f32(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16.v8i16.v8f32(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16.v8i16.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.v8f32.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.v8f32.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.v8f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
-declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.v8i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
index 086144873a042..92b9b66500c63 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-iu-modifiers.ll
@@ -300,10 +300,3 @@ bb:
   store <8 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
index a6e1f5ef12b4b..f79d5135beccd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32-swmmac-index_key.ll
@@ -308,14 +308,3 @@ bb:
   store <8 x float> %res1, ptr addrspace(1) %out1
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.v8f32.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.v8f32.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.v8f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
-declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.v8i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
index 3aa81da317d67..ae4d6956f2a1a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w32.ll
@@ -345,26 +345,3 @@ bb:
   store <8 x float> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16.v8f16.v8f32(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16.v8i16.v8f32(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16.v8i16.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32.i32.v8i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32.v2i32.v8f32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32.v2i32.v8i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.v8f32.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.v8f32.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.v8f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
-declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.v8i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.v8i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.v8i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.v8i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.v8f32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index 6c232b680ebf5..59803c4d0a0f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -443,17 +443,3 @@ bb:
   store <4 x half> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-declare float @llvm.fabs.f32(float)
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f16.v4f32(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4i16.v4f32(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f16.v8f16.v4f32.i16(<4 x half>, <8 x half>, <4 x float>, i16)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v8f16.v4f16.i16(<4 x half>, <8 x half>, <4 x half>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
index 717a4fc823d51..59e1705365cb7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-imm.ll
@@ -416,15 +416,3 @@ bb:
   store <4 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16.v4f16.v4f32(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16.v4i16.v4f32(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16.v4f16.v4f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16.v4i16.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
index 1ef50cbd0fc7e..58b23653759d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-iu-modifiers.ll
@@ -265,10 +265,3 @@ bb:
   store <4 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
index 0bd255e5e1af4..440741ae8a13c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64-swmmac-index_key.ll
@@ -458,15 +458,3 @@ bb:
   store <4 x float> %res3, ptr addrspace(1) %out3
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.v4f32.i8(<4 x half>, <8 x half>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.v4f32.i8(<4 x i16>, <8 x i16>, <4 x float>, i8)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.v4f16.i8(<4 x half>, <8 x half>, <4 x half>, i8)
-declare <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.v4i16.i8(<4 x i16>, <8 x i16>, <4 x i16>, i8)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
index 7399fa0a341e2..cf7a1539ca6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/wmma-gfx12-w64.ll
@@ -308,26 +308,3 @@ bb:
   store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16.v4f16.v4f32(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16.v4i16.v4f32(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16.v4f16.v4f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16.v4i16.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32.i32.v4f32(i32, i32, <4 x float>)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32.i32.v4i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.v4f32.i8(<4 x half>, <8 x half>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.v4f32.i8(<4 x i16>, <8 x i16>, <4 x float>, i8)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.v4f16.i8(<4 x half>, <8 x half>, <4 x half>, i8)
-declare <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.v4i16.i8(<4 x i16>, <8 x i16>, <4 x i16>, i8)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.v4i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.v4i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.v4i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.v4f32.i8(i32, <2 x i32>, <4 x float>, i8)
diff --git a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
index ae20ab1de3a2d..0b282c15b4520 100644
--- a/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/abi-attribute-hints-undefined-behavior.ll
@@ -384,21 +384,6 @@ define void @debugtrap_requires_queue() #0 {
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
-declare i32 @llvm.amdgcn.workitem.id.z()
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-declare i64 @llvm.amdgcn.dispatch.id()
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare i1 @llvm.amdgcn.is.shared(ptr)
-declare i1 @llvm.amdgcn.is.private(ptr)
-declare void @llvm.trap()
-declare void @llvm.debugtrap()
-
 attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-work-group-id-x" "amdgpu-no-work-group-id-y" "amdgpu-no-work-group-id-z" "amdgpu-no-work-item-id-x" "amdgpu-no-work-item-id-y" "amdgpu-no-work-item-id-z" }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
index be4d6a2c27895..c475d10ff8c70 100644
--- a/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
+++ b/llvm/test/CodeGen/AMDGPU/acc-ldst.ll
@@ -1,10 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -early-live-intervals < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; GCN-LABEL:  {{^}}test_load_mfma_store16:
 ; GCN-COUNT-8: global_load_dwordx4 a[{{[0-9:]+}}], v{{[0-9:]+}}, s[{{[0-9:]+}}]
 ; GCN-NOT:     v_accvgpr_write
diff --git a/llvm/test/CodeGen/AMDGPU/add.i16.ll b/llvm/test/CodeGen/AMDGPU/add.i16.ll
index 417ff54e56568..65e00743f8354 100644
--- a/llvm/test/CodeGen/AMDGPU/add.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.i16.ll
@@ -142,7 +142,5 @@ define amdgpu_kernel void @v_test_add_i16_sext_to_i64(ptr addrspace(1) %out, ptr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/add.ll b/llvm/test/CodeGen/AMDGPU/add.ll
index 422e2747094ce..64c3abc8f556b 100644
--- a/llvm/test/CodeGen/AMDGPU/add.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.ll
@@ -1424,7 +1424,5 @@ define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index 6f67ce4de9ce5..4bf148eb5c07f 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -1049,7 +1049,5 @@ define <2 x i16> @add_inline_imm_1_0(<2 x i16> %x) {
   ret <2 x i16> %y
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/add_i1.ll b/llvm/test/CodeGen/AMDGPU/add_i1.ll
index e9e3fa765b52f..ab50054cf0113 100644
--- a/llvm/test/CodeGen/AMDGPU/add_i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/add_i1.ll
@@ -47,5 +47,3 @@ endif:
   store i1 %3, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/add_i128.ll b/llvm/test/CodeGen/AMDGPU/add_i128.ll
index d2010568df915..786bf58eb4813 100644
--- a/llvm/test/CodeGen/AMDGPU/add_i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/add_i128.ll
@@ -52,5 +52,3 @@ define amdgpu_kernel void @test_sreg(ptr addrspace(1) noalias %out, i128 %a, i12
   store i128 %result, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() readnone
diff --git a/llvm/test/CodeGen/AMDGPU/add_i64.ll b/llvm/test/CodeGen/AMDGPU/add_i64.ll
index 9400bf6098552..8841b6a16f2e4 100644
--- a/llvm/test/CodeGen/AMDGPU/add_i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/add_i64.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() readnone
-
 ; SI-LABEL: {{^}}test_i64_vreg:
 ; SI: v_add_i32
 ; SI: v_addc_u32
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 66034af5c351f..3410cec94b7d3 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -2,8 +2,6 @@
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
 
-declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i32, i1) #0
-
 @lds.i32 = unnamed_addr addrspace(3) global i32 undef, align 4
 @lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4
 
@@ -18,7 +16,7 @@ declare void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) nocapture, ptr addrspace(4)
 ;.
 define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_cast_0_flat_to_group_addrspacecast
-; HSA-SAME: () #[[ATTR1:[0-9]+]] {
+; HSA-SAME: () #[[ATTR0:[0-9]+]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) null to ptr addrspace(3)), align 4
 ; HSA-NEXT:    ret void
 ;
@@ -28,12 +26,12 @@ define amdgpu_kernel void @store_cast_0_flat_to_group_addrspacecast() #1 {
 
 define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_cast_0_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) null to ptr addrspace(4)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -43,12 +41,12 @@ define amdgpu_kernel void @store_cast_0_group_to_flat_addrspacecast() #1 {
 
 define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_to_flat
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.i32 to ptr addrspace(4)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -58,12 +56,12 @@ define amdgpu_kernel void @store_constant_cast_group_gv_to_flat() #1 {
 
 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -73,7 +71,7 @@ define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat() #1 {
 
 define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_to_flat
-; HSA-SAME: () #[[ATTR1]] {
+; HSA-SAME: () #[[ATTR0]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.i32 to ptr addrspace(4)), align 4
 ; HSA-NEXT:    ret void
 ;
@@ -83,7 +81,7 @@ define amdgpu_kernel void @store_constant_cast_global_gv_to_flat() #1 {
 
 define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
 ; HSA-LABEL: define {{[^@]+}}@store_constant_cast_global_gv_gep_to_flat
-; HSA-SAME: () #[[ATTR1]] {
+; HSA-SAME: () #[[ATTR0]] {
 ; HSA-NEXT:    store i32 7, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(1) @global.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; HSA-NEXT:    ret void
 ;
@@ -93,13 +91,13 @@ define amdgpu_kernel void @store_constant_cast_global_gv_gep_to_flat() #1 {
 
 define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@load_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), align 4
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -111,13 +109,13 @@ define amdgpu_kernel void @load_constant_cast_group_gv_gep_to_flat(ptr addrspace
 
 define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@atomicrmw_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = atomicrmw add ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 1 seq_cst, align 4
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[OUT]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -129,14 +127,14 @@ define amdgpu_kernel void @atomicrmw_constant_cast_group_gv_gep_to_flat(ptr addr
 
 define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
 ; AKF_HSA-NEXT:    store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@cmpxchg_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = cmpxchg ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 0, i32 1 seq_cst seq_cst, align 4
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL0]], ptr addrspace(1) [[OUT]], align 4
@@ -150,12 +148,12 @@ define amdgpu_kernel void @cmpxchg_constant_cast_group_gv_gep_to_flat(ptr addrsp
 
 define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@memcpy_constant_cast_group_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @llvm.memcpy.p1.p4.i32(ptr addrspace(1) align 4 [[OUT]], ptr addrspace(4) align 4 getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), i32 32, i1 false)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -166,12 +164,12 @@ define amdgpu_kernel void @memcpy_constant_cast_group_gv_gep_to_flat(ptr addrspa
 ; Can't just search the pointer value
 define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    store ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8), ptr addrspace(1) [[OUT]], align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -182,12 +180,12 @@ define amdgpu_kernel void @store_value_constant_cast_lds_gv_gep_to_flat(ptr addr
 ; Can't just search pointer types
 define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat(ptr addrspace(1) %out) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    store i64 ptrtoint (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to i64), ptr addrspace(1) [[OUT]], align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -198,12 +196,12 @@ define amdgpu_kernel void @store_ptrtoint_value_constant_cast_lds_gv_gep_to_flat
 ; Cast group to flat, do GEP, cast back to group
 define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@store_constant_cast_group_gv_gep_to_flat_to_group
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    store i32 7, ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3)), align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -213,11 +211,11 @@ define amdgpu_kernel void @store_constant_cast_group_gv_gep_to_flat_to_group() #
 
 define ptr addrspace(3) @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@ret_constant_cast_group_gv_gep_to_flat_to_group
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
 ;
   ret ptr addrspace(3) addrspacecast (ptr addrspace(4) getelementptr ([256 x i32], ptr addrspace(4) addrspacecast (ptr addrspace(3) @lds.arr to ptr addrspace(4)), i64 0, i64 8) to ptr addrspace(3))
@@ -229,13 +227,13 @@ attributes #1 = { nounwind }
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
-; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
+; AKF_HSA: attributes #[[ATTR0]] = { nounwind }
+; AKF_HSA: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
-; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 50423c59eabe9..7375bb841f3be 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -415,9 +415,6 @@ define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(ptr addrspa
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind convergent }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-cse.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-cse.ll
index f6c36b32bb13e..2ddc464a080e4 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-cse.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-cse.ll
@@ -27,6 +27,3 @@ bb:
   %a1 = fadd float %a0, %e2
   ret float %a1
 }
-
-declare <3 x float> @llvm.amdgcn.image.load.2d.v3f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare <2 x float> @llvm.amdgcn.image.load.2d.v2f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
index ec4e2dda10d3a..7cb5ddce7e9e5 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-invalid-copy.ll
@@ -75,10 +75,5 @@ main_body:
   ret void
 }
 
-
-declare <2 x float> @llvm.amdgcn.image.sample.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.getlod.1d.v2f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/adjust-writemask-vectorized.ll b/llvm/test/CodeGen/AMDGPU/adjust-writemask-vectorized.ll
index 30c5ccb109b54..485d720f4a6a9 100644
--- a/llvm/test/CodeGen/AMDGPU/adjust-writemask-vectorized.ll
+++ b/llvm/test/CodeGen/AMDGPU/adjust-writemask-vectorized.ll
@@ -10,5 +10,3 @@ main_body:
   %r = fadd <4 x float> %v, %a
   ret <4 x float> %r
 }
-
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index 60f61a67ccf0b..f02c6f8e4da89 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -1149,9 +1149,6 @@ define void @no_free_vgprs_at_sgpr_to_agpr_copy(float %v0, float %v1) #0 {
   ret void
 }
 
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { "amdgpu-waves-per-eu"="6,6" }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
index fc1326268fd1e..2424b20bf1176 100644
--- a/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
+++ b/llvm/test/CodeGen/AMDGPU/alignbit-pat.ll
@@ -95,6 +95,4 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/always-uniform.ll b/llvm/test/CodeGen/AMDGPU/always-uniform.ll
index 0c5e1ec0d5b6f..4d834569d20d3 100644
--- a/llvm/test/CodeGen/AMDGPU/always-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/always-uniform.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.readfirstlane(i32)
-
 define amdgpu_kernel void @readfirstlane_uniform(ptr addrspace(1) noalias nocapture readonly, ptr addrspace(1) noalias nocapture readonly) {
 ; GCN-LABEL: readfirstlane_uniform:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
index 330cf48803680..145975c4f700c 100644
--- a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
+++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
@@ -99,5 +99,3 @@ else:
   store i32 %x, ptr %p
   ret void
 }
-
-declare void @llvm.amdgcn.endpgm()
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
index 51b273b909f6d..0cc3d4b995a8b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-ieee.ll
@@ -180,8 +180,6 @@ define amdgpu_ps void @ps_ieee_mode_off() #2 {
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #3
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "amdgpu-ieee"="true" }
 attributes #2 = { nounwind "amdgpu-ieee"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
index 77976e470fc78..352c6455ebaa9 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
@@ -205,18 +205,11 @@ define amdgpu_cs void @test_buffer_load_sgpr_or_imm_offset(<4 x i32> inreg %base
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8) nocapture, i32, i32, i32 immarg) #1
-
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) nounwind readnone willreturn
-
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #3
 
 ; Function Attrs: nounwind readnone speculatable
-declare i64 @llvm.amdgcn.s.getpc() #3
 
 ; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32 immarg) #1
 
 attributes #0 = { argmemonly nounwind willreturn }
 attributes #1 = { nounwind memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
index cab8e0b8baaa5..b5a0d72fe5715 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.bitcast.ll
@@ -288,8 +288,6 @@ define amdgpu_kernel void @v2i32_to_v4f16(ptr addrspace(1) %out, ptr addrspace(1
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32 immarg)
-
 ; CHECK-LABEL: {{^}}bitcast_v4f32_to_v2i64:
 ; CHECK: s_buffer_load_{{dwordx4|b128}}
 define <2 x i64> @bitcast_v4f32_to_v2i64(<2 x i64> %arg) {
@@ -299,8 +297,6 @@ define <2 x i64> @bitcast_v4f32_to_v2i64(<2 x i64> %arg) {
   ret <2 x i64> %div
 }
 
-declare half @llvm.canonicalize.f16(half)
-
 ; CHECK-LABEL: {{^}}bitcast_f32_to_v1i32:
 define amdgpu_kernel void @bitcast_f32_to_v1i32(ptr addrspace(1) %out) {
   %f16 = call arcp afn half @llvm.canonicalize.f16(half 0xH03F0)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
index a3f27ebddf9d6..fd25ec1f890fc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll
@@ -6,9 +6,6 @@
 ; RUN: llc -mattr=-promote-alloca -verify-machineinstrs -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck --check-prefixes=GCN,GCN-ALLOCA %s
 
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-
 ; Make sure we don't overwrite workitem information with private memory
 
 ; GCN-LABEL: {{^}}work_item_info:
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
index d9e0464942182..8bfc1ead7449b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fdiv.ll
@@ -4475,12 +4475,6 @@ define amdgpu_kernel void @fdiv_fpmath_f32_nosub_rhs(ptr addrspace(1) %out, floa
   ret void
 }
 
-declare float @llvm.sqrt.f32(float)
-declare float @llvm.fabs.f32(float)
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
-declare void @llvm.assume(i1 noundef)
-
 attributes #0 = { optnone noinline }
 
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
index 5c40a4ce13e31..f55825f0b1f97 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-fold-binop-select.ll
@@ -508,6 +508,4 @@ define <2 x half> @multi_use_cast_regression(i1 %cond) {
   ret <2 x half> %call
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
index 210356d131350..1f43d60abfa44 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-i16-to-i32.ll
@@ -678,8 +678,6 @@ define amdgpu_kernel void @select_sle_i3(i3 %a, i3 %b) {
   store volatile i3 %sel, ptr addrspace(1) undef
   ret void
 }
-
-declare i3 @llvm.bitreverse.i3(i3)
 define amdgpu_kernel void @bitreverse_i3(i3 %a) {
 ; SI-LABEL: @bitreverse_i3(
 ; SI-NEXT:    [[BREV:%.*]] = call i3 @llvm.bitreverse.i3(i3 [[A:%.*]])
@@ -1436,8 +1434,6 @@ define amdgpu_kernel void @select_sle_i16(i16 %a, i16 %b) {
   ret void
 }
 
-declare i16 @llvm.bitreverse.i16(i16)
-
 define amdgpu_kernel void @bitreverse_i16(i16 %a) {
 ; SI-LABEL: @bitreverse_i16(
 ; SI-NEXT:    [[BREV:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[A:%.*]])
@@ -2133,8 +2129,6 @@ define amdgpu_kernel void @select_sle_3xi15(<3 x i15> %a, <3 x i15> %b) {
   store volatile <3 x i15> %sel, ptr addrspace(1) undef
   ret void
 }
-
-declare <3 x i15> @llvm.bitreverse.v3i15(<3 x i15>)
 define amdgpu_kernel void @bitreverse_3xi15(<3 x i15> %a) {
 ; SI-LABEL: @bitreverse_3xi15(
 ; SI-NEXT:    [[BREV:%.*]] = call <3 x i15> @llvm.bitreverse.v3i15(<3 x i15> [[A:%.*]])
@@ -2831,8 +2825,6 @@ define amdgpu_kernel void @select_sle_3xi16(<3 x i16> %a, <3 x i16> %b) {
   ret void
 }
 
-declare <3 x i16> @llvm.bitreverse.v3i16(<3 x i16>)
-
 define amdgpu_kernel void @bitreverse_3xi16(<3 x i16> %a) {
 ; SI-LABEL: @bitreverse_3xi16(
 ; SI-NEXT:    [[BREV:%.*]] = call <3 x i16> @llvm.bitreverse.v3i16(<3 x i16> [[A:%.*]])
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
index 03c84d1193609..849edde57820f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-sqrt.ll
@@ -602,11 +602,6 @@ define amdgpu_kernel void @sqrt_fpmath_f32_assume_nosub(ptr addrspace(1) %out, f
   ret void
 }
 
-declare float @llvm.sqrt.f32(float)
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
-declare float @llvm.fabs.f32(float)
-declare void @llvm.assume(i1 noundef)
-
 attributes #0 = { optnone noinline }
 
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
index c92b78cd45573..335cf2b9430b7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-cc.ll
@@ -925,12 +925,6 @@ define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) {
   store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
   ret void
 }
-
-declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
-declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
-declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...)
 declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>)
 declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
 declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>)
-declare i32 @llvm.amdgcn.set.inactive(i32, i32)
-declare i32 @llvm.amdgcn.wwm(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll
index 8d9ed9bb4343c..b5e41eb0fcd50 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-preserve-cc.ll
@@ -650,13 +650,6 @@ define amdgpu_cs_chain_preserve void @amdgpu_cs_chain_preserve_dont_realign_stac
   store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32
   ret void
 }
-
-declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...)
 declare amdgpu_cs_chain_preserve void @chain_preserve_callee(<3 x i32> inreg, <3 x i32>)
 declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>)
-
-declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...)
 declare amdgpu_cs_chain_preserve void @chain_preserve_callee_2(<2 x i32> inreg, <2 x i32>)
-
-declare i32 @llvm.amdgcn.set.inactive(i32, i32)
-declare i32 @llvm.amdgcn.wwm(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
index b34df3ffca264..a34783a3343cc 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-inline.ll
@@ -170,8 +170,6 @@ bb.2:
   store float %c, ptr addrspace(1) %a
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 declare float @_Z3sinf(float) #1
 declare void @forbid_sroa(ptr addrspace(5) nocapture %p)
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
index 9f5b6389ab59f..6c0704ab3d23f 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
@@ -46,6 +46,5 @@ define i32 @f(i32 %x, i32 %y) {
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #20
 
 !4 = !{i32 0, i32 1024}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
index b8681a0a2ac65..f22051c210cd7 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-nsa-threshold.ll
@@ -276,9 +276,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readonly "amdgpu-nsa-threshold"="2" }
 attributes #3 = { nounwind readonly "amdgpu-nsa-threshold"="3" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
index c5dbfb0f219bd..fdfe2218c88ba 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-reloc-const.ll
@@ -22,10 +22,8 @@ define amdgpu_ps void @ps_main(i32 %arg, i32 inreg %arg1, i32 inreg %arg2) local
 }
 
 ; Function Attrs: inaccessiblememonly nounwind
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.reloc.constant(metadata) #2
 
 attributes #0 = { nounwind }
 attributes #1 = { inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
index c4bd4bc126f73..e0a127b22e4fe 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow.ll
@@ -21,14 +21,6 @@ declare <3 x half> @_Z3powDv3_DhS_(<3 x half>, <3 x half>)
 declare <4 x half> @_Z3powDv4_DhS_(<4 x half>, <4 x half>)
 declare <8 x half> @_Z3powDv8_DhS_(<8 x half>, <8 x half>)
 declare <16 x half> @_Z3powDv16_DhS_(<16 x half>, <16 x half>)
-declare void @llvm.assume(i1 noundef)
-declare float @llvm.floor.f32(float)
-declare float @llvm.ceil.f32(float)
-declare float @llvm.trunc.f32(float)
-declare float @llvm.rint.f32(float)
-declare float @llvm.nearbyint.f32(float)
-declare float @llvm.round.f32(float)
-declare float @llvm.roundeven.f32(float)
 
 define float @test_pow_fast_f32(float %x, float %y) {
 ; CHECK-LABEL: define float @test_pow_fast_f32
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
index 5c56276eeb0f1..a83f4f88392d3 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -1275,8 +1275,6 @@ entry:
   ret float %sin2
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #1 = { argmemonly nounwind willreturn }
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
index cc116dfe807ec..6d93ea1813fd8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
@@ -143,16 +143,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.r600.read.ngroups.x() #0
-declare i32 @llvm.r600.read.ngroups.y() #0
-declare i32 @llvm.r600.read.ngroups.z() #0
-
-declare i32 @llvm.r600.read.global.size.x() #0
-declare i32 @llvm.r600.read.global.size.y() #0
-declare i32 @llvm.r600.read.global.size.z() #0
-
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
 attributes #0 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
index d6322e2b4d3e3..799ce2b006e0b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-msgpack-psenable.ll
@@ -17,9 +17,6 @@ define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inre
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-
 attributes #6 = { nounwind "InitialPSInputAddr"="2" }
 
 ; Force MsgPack format metadata
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
index 13d2050c491fb..3b7f2b07b7d70 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-psenable.ll
@@ -28,7 +28,4 @@ define amdgpu_ps void @amdpal_psenable(i32 inreg, i32 inreg, i32 inreg, i32 inre
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-
 attributes #6 = { nounwind "InitialPSInputAddr"="2" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal.ll b/llvm/test/CodeGen/AMDGPU/amdpal.ll
index 0ec5aeb24b423..465b7b85b2273 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal.ll
@@ -78,8 +78,6 @@ entry:
 
 attributes #0 = { nounwind "amdgpu-git-ptr-high"="0x1234" }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
 
 ; PAL:         .amdgpu_pal_metadata
 ; PAL-NEXT: ---
diff --git a/llvm/test/CodeGen/AMDGPU/and-gcn.ll b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
index 095c25d8436b5..1b3fa4441dbca 100644
--- a/llvm/test/CodeGen/AMDGPU/and-gcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/and-gcn.ll
@@ -21,6 +21,4 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/and.ll b/llvm/test/CodeGen/AMDGPU/and.ll
index d6137597293f6..ac3eced7a03df 100644
--- a/llvm/test/CodeGen/AMDGPU/and.ll
+++ b/llvm/test/CodeGen/AMDGPU/and.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global,-xnack -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 ; FUNC-LABEL: {{^}}test2:
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
 ; EG: AND_INT {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
diff --git a/llvm/test/CodeGen/AMDGPU/andorbitset.ll b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
index a189ba9b10342..e15c4f3835f18 100644
--- a/llvm/test/CodeGen/AMDGPU/andorbitset.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorbitset.ll
@@ -73,6 +73,4 @@ bb6:
   unreachable
 }
 
-declare float @llvm.fabs.f32(float) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/andorn2.ll b/llvm/test/CodeGen/AMDGPU/andorn2.ll
index 3226a77bb9d34..8112f6fa2ca95 100644
--- a/llvm/test/CodeGen/AMDGPU/andorn2.ll
+++ b/llvm/test/CodeGen/AMDGPU/andorn2.ll
@@ -100,4 +100,3 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index af0eb23d8e991..02b73a033f598 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -4,29 +4,15 @@
 
 ; TODO: The test contains UB which is refined by the Attributor and should be removed.
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-
 define void @use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
-; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR0:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR1:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR0:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -38,13 +24,13 @@ define void @use_workitem_id_x() #1 {
 
 define void @use_workitem_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -56,13 +42,13 @@ define void @use_workitem_id_y() #1 {
 
 define void @use_workitem_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workitem_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workitem_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR2:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -74,13 +60,13 @@ define void @use_workitem_id_z() #1 {
 
 define void @use_workgroup_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR4:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR3:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -92,13 +78,13 @@ define void @use_workgroup_id_x() #1 {
 
 define void @use_workgroup_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR5:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR4:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -110,13 +96,13 @@ define void @use_workgroup_id_y() #1 {
 
 define void @use_workgroup_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR6:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR5:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -128,13 +114,13 @@ define void @use_workgroup_id_z() #1 {
 
 define void @use_dispatch_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR7:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR6:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -146,13 +132,13 @@ define void @use_dispatch_ptr() #1 {
 
 define void @use_queue_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[QUEUE_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR8:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR7:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[QUEUE_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[QUEUE_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -164,13 +150,13 @@ define void @use_queue_ptr() #1 {
 
 define void @use_dispatch_id() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
 ; AKF_HSA-NEXT:    store volatile i64 [[VAL]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_id
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR9:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR8:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i64 [[VAL]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -182,7 +168,7 @@ define void @use_dispatch_id() #1 {
 
 define void @use_workgroup_id_y_workgroup_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) undef, align 4
@@ -190,7 +176,7 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_workgroup_id_y_workgroup_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR10:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR9:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) undef, align 4
@@ -206,12 +192,12 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 {
 
 define void @func_indirect_use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_x()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR0]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workitem_id_x()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -221,12 +207,12 @@ define void @func_indirect_use_workitem_id_x() #1 {
 
 define void @kernel_indirect_use_workitem_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_x()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR0]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workitem_id_x()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -236,12 +222,12 @@ define void @kernel_indirect_use_workitem_id_x() #1 {
 
 define void @func_indirect_use_workitem_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workitem_id_y()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -251,12 +237,12 @@ define void @func_indirect_use_workitem_id_y() #1 {
 
 define void @func_indirect_use_workitem_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workitem_id_z()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workitem_id_z()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -266,12 +252,12 @@ define void @func_indirect_use_workitem_id_z() #1 {
 
 define void @func_indirect_use_workgroup_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_x()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workgroup_id_x()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -281,12 +267,12 @@ define void @func_indirect_use_workgroup_id_x() #1 {
 
 define void @kernel_indirect_use_workgroup_id_x() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_x()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workgroup_id_x()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -296,12 +282,12 @@ define void @kernel_indirect_use_workgroup_id_x() #1 {
 
 define void @func_indirect_use_workgroup_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workgroup_id_y()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -311,12 +297,12 @@ define void @func_indirect_use_workgroup_id_y() #1 {
 
 define void @func_indirect_use_workgroup_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_workgroup_id_z()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_workgroup_id_z()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -326,12 +312,12 @@ define void @func_indirect_use_workgroup_id_z() #1 {
 
 define void @func_indirect_indirect_use_workgroup_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -341,12 +327,12 @@ define void @func_indirect_indirect_use_workgroup_id_y() #1 {
 
 define void @indirect_x2_use_workgroup_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_indirect_use_workgroup_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_indirect_indirect_use_workgroup_id_y()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -356,12 +342,12 @@ define void @indirect_x2_use_workgroup_id_y() #1 {
 
 define void @func_indirect_use_dispatch_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_dispatch_ptr()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_dispatch_ptr()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -371,12 +357,12 @@ define void @func_indirect_use_dispatch_ptr() #1 {
 
 define void @func_indirect_use_queue_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_queue_ptr()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_queue_ptr()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -386,12 +372,12 @@ define void @func_indirect_use_queue_ptr() #1 {
 
 define void @func_indirect_use_dispatch_id() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_dispatch_id()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_dispatch_id()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -401,12 +387,12 @@ define void @func_indirect_use_dispatch_id() #1 {
 
 define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR11:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR10:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -416,14 +402,14 @@ define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
 
 define void @recursive_use_workitem_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    call void @recursive_use_workitem_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL]], ptr addrspace(1) undef, align 4
 ; ATTRIBUTOR_HSA-NEXT:    call void @recursive_use_workitem_id_y()
@@ -437,12 +423,12 @@ define void @recursive_use_workitem_id_y() #1 {
 
 define void @call_recursive_use_workitem_id_y() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @recursive_use_workitem_id_y()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @recursive_use_workitem_id_y()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -452,13 +438,13 @@ define void @call_recursive_use_workitem_id_y() #1 {
 
 define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -471,13 +457,13 @@ define void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
 
 define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
-; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -489,14 +475,14 @@ define void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) %ptr) #2 {
 
 define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %ptr) #2 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
-; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR2]] {
+; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; AKF_HSA-NEXT:    call void @func_indirect_use_queue_ptr()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR14:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr addrspace(4)
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(4) [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_indirect_use_queue_ptr()
@@ -510,12 +496,12 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) %pt
 
 define void @indirect_use_group_to_flat_addrspacecast() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_group_to_flat_addrspacecast(ptr addrspace(3) null)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -525,12 +511,12 @@ define void @indirect_use_group_to_flat_addrspacecast() #1 {
 
 define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR10]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_gfx9(ptr addrspace(3) null)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -540,12 +526,12 @@ define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
 
 define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(ptr addrspace(3) null)
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -555,13 +541,13 @@ define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
 
 define void @use_kernarg_segment_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[KERNARG_SEGMENT_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[KERNARG_SEGMENT_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR10]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[KERNARG_SEGMENT_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[KERNARG_SEGMENT_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -572,12 +558,12 @@ define void @use_kernarg_segment_ptr() #1 {
 }
 define void @func_indirect_use_kernarg_segment_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_kernarg_segment_ptr()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR10]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_kernarg_segment_ptr()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -587,13 +573,13 @@ define void @func_indirect_use_kernarg_segment_ptr() #1 {
 
 define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR14:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -605,13 +591,13 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
 
 define void @use_implicitarg_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[IMPLICITARG_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -623,12 +609,12 @@ define void @use_implicitarg_ptr() #1 {
 
 define void @func_indirect_use_implicitarg_ptr() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    call void @use_implicitarg_ptr()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @use_implicitarg_ptr()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -641,11 +627,11 @@ declare void @external.func() #3
 ; This function gets deleted.
 define internal void @defined.func() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@defined.func
-; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@defined.func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -653,12 +639,12 @@ define internal void @defined.func() #3 {
 
 define void @func_call_external() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external
-; AKF_HSA-SAME: () #[[ATTR3]] {
+; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:    call void @external.func()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @external.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -668,12 +654,12 @@ define void @func_call_external() #3 {
 
 define void @func_call_defined() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined
-; AKF_HSA-SAME: () #[[ATTR3]] {
+; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:    call void @defined.func()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @defined.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -682,12 +668,12 @@ define void @func_call_defined() #3 {
 }
 define void @func_call_asm() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm
-; AKF_HSA-SAME: () #[[ATTR3]] {
-; AKF_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR3]]
+; AKF_HSA-SAME: () #[[ATTR2]] {
+; AKF_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR2]]
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void asm sideeffect "", ""() #[[ATTR28:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -697,12 +683,12 @@ define void @func_call_asm() #3 {
 
 define amdgpu_kernel void @kern_call_external() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external
-; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR3:[0-9]+]] {
 ; AKF_HSA-NEXT:    call void @external.func()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @external.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -712,12 +698,12 @@ define amdgpu_kernel void @kern_call_external() #3 {
 
 define amdgpu_kernel void @func_kern_defined() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined
-; AKF_HSA-SAME: () #[[ATTR4]] {
+; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @defined.func()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @defined.func()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -727,13 +713,13 @@ define amdgpu_kernel void @func_kern_defined() #3 {
 
 define i32 @use_dispatch_ptr_ret_type() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; AKF_HSA-NEXT:    store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) undef, align 8
 ; AKF_HSA-NEXT:    ret i32 0
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile ptr addrspace(4) [[DISPATCH_PTR]], ptr addrspace(1) undef, align 8
 ; ATTRIBUTOR_HSA-NEXT:    ret i32 0
@@ -745,13 +731,13 @@ define i32 @use_dispatch_ptr_ret_type() #1 {
 
 define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
-; AKF_HSA-SAME: () #[[ATTR1]] {
+; AKF_HSA-SAME: () #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @use_dispatch_ptr_ret_type()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float @use_dispatch_ptr_ret_type()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -763,13 +749,13 @@ define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
 
 define float @func_indirect_call(ptr %fptr) #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
+; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float [[FPTR]]()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -782,13 +768,13 @@ define float @func_indirect_call(ptr %fptr) #3 {
 declare float @extern() #3
 define float @func_extern_call() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_extern_call
-; AKF_HSA-SAME: () #[[ATTR3]] {
+; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @extern()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_extern_call
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float @extern()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -800,13 +786,13 @@ define float @func_extern_call() #3 {
 
 define float @func_null_call(ptr %fptr) #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_null_call
-; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR3]] {
+; AKF_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float null()
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_null_call
-; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR16]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[FPTR:%.*]]) #[[ATTR15]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float null()
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -816,18 +802,16 @@ define float @func_null_call(ptr %fptr) #3 {
   ret float %fadd
 }
 
-declare float @llvm.amdgcn.rcp.f32(float) #0
-
 ; Calls some other recognized intrinsic
 define float @func_other_intrinsic_call(float %arg) #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
-; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR3]] {
+; AKF_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR2]] {
 ; AKF_HSA-NEXT:    [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
 ; AKF_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; AKF_HSA-NEXT:    ret float [[FADD]]
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_other_intrinsic_call
-; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR17]] {
+; ATTRIBUTOR_HSA-SAME: (float [[ARG:%.*]]) #[[ATTR16]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[F:%.*]] = call float @llvm.amdgcn.rcp.f32(float [[ARG]])
 ; ATTRIBUTOR_HSA-NEXT:    [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
 ; ATTRIBUTOR_HSA-NEXT:    ret float [[FADD]]
@@ -840,12 +824,12 @@ define float @func_other_intrinsic_call(float %arg) #3 {
 ; Hostcall needs to be enabled for sanitizers
 define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; AKF_HSA-SAME: () #[[ATTR5:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR4:[0-9]+]] {
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -856,12 +840,12 @@ define amdgpu_kernel void @kern_sanitize_address() #4 {
 ; Hostcall needs to be enabled for sanitizers
 define void @func_sanitize_address() #4 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; AKF_HSA-SAME: () #[[ATTR5]] {
+; AKF_HSA-SAME: () #[[ATTR4]] {
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr addrspace(1) null, align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -872,12 +856,12 @@ define void @func_sanitize_address() #4 {
 ; Hostcall needs to be enabled for sanitizers
 define void @func_indirect_sanitize_address() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; AKF_HSA-SAME: () #[[ATTR3]] {
+; AKF_HSA-SAME: () #[[ATTR2]] {
 ; AKF_HSA-NEXT:    call void @func_sanitize_address()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -888,12 +872,12 @@ define void @func_indirect_sanitize_address() #3 {
 ; Hostcall needs to be enabled for sanitizers
 define amdgpu_kernel void @kern_indirect_sanitize_address() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; AKF_HSA-SAME: () #[[ATTR4]] {
+; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @func_sanitize_address()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_indirect_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR22:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -907,12 +891,12 @@ declare void @extern_func_sanitize_address() #5
 
 define amdgpu_kernel void @kern_decl_sanitize_address() #3 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address
-; AKF_HSA-SAME: () #[[ATTR4]] {
+; AKF_HSA-SAME: () #[[ATTR3]] {
 ; AKF_HSA-NEXT:    call void @extern_func_sanitize_address()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_decl_sanitize_address
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR18]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @extern_func_sanitize_address()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -924,11 +908,11 @@ declare void @enqueue_block_decl() #6
 
 define internal void @enqueue_block_def() #6 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; AKF_HSA-SAME: () #[[ATTR7:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR6:[0-9]+]] {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@enqueue_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR24:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -936,12 +920,12 @@ define internal void @enqueue_block_def() #6 {
 
 define amdgpu_kernel void @kern_call_enqueued_block_decl() {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; AKF_HSA-SAME: () #[[ATTR8:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR7:[0-9]+]] {
 ; AKF_HSA-NEXT:    call void @enqueue_block_decl()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_decl
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR25:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_decl()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -951,12 +935,12 @@ define amdgpu_kernel void @kern_call_enqueued_block_decl() {
 
 define amdgpu_kernel void @kern_call_enqueued_block_def() {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; AKF_HSA-SAME: () #[[ATTR8]] {
+; AKF_HSA-SAME: () #[[ATTR7]] {
 ; AKF_HSA-NEXT:    call void @enqueue_block_def()
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_enqueued_block_def
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @enqueue_block_def()
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -969,7 +953,7 @@ define void @unused_enqueue_block() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@unused_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -980,7 +964,7 @@ define internal void @known_func() {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@known_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26]] {
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
   ret void
@@ -989,12 +973,12 @@ define internal void @known_func() {
 ; Should never happen
 define amdgpu_kernel void @kern_callsite_enqueue_block() {
 ; AKF_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; AKF_HSA-SAME: () #[[ATTR8]] {
-; AKF_HSA-NEXT:    call void @known_func() #[[ATTR7]]
+; AKF_HSA-SAME: () #[[ATTR7]] {
+; AKF_HSA-NEXT:    call void @known_func() #[[ATTR6]]
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_callsite_enqueue_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR27]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR26]] {
 ; ATTRIBUTOR_HSA-NEXT:    call void @known_func() #[[ATTR29:[0-9]+]]
 ; ATTRIBUTOR_HSA-NEXT:    ret void
 ;
@@ -1014,44 +998,44 @@ attributes #6 = { "enqueued-block" }
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
-; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="fiji" }
-; AKF_HSA: attributes #[[ATTR2]] = { nounwind "target-cpu"="gfx900" }
-; AKF_HSA: attributes #[[ATTR3]] = { nounwind }
-; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-calls" }
-; AKF_HSA: attributes #[[ATTR5]] = { nounwind sanitize_address }
-; AKF_HSA: attributes #[[ATTR6:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
-; AKF_HSA: attributes #[[ATTR7]] = { "enqueued-block" }
-; AKF_HSA: attributes #[[ATTR8]] = { "amdgpu-calls" }
+; AKF_HSA: attributes #[[ATTR0]] = { nounwind "target-cpu"="fiji" }
+; AKF_HSA: attributes #[[ATTR1]] = { nounwind "target-cpu"="gfx900" }
+; AKF_HSA: attributes #[[ATTR2]] = { nounwind }
+; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-calls" }
+; AKF_HSA: attributes #[[ATTR4]] = { nounwind sanitize_address }
+; AKF_HSA: attributes #[[ATTR5:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
+; AKF_HSA: attributes #[[ATTR6]] = { "enqueued-block" }
+; AKF_HSA: attributes #[[ATTR7]] = { "amdgpu-calls" }
+; AKF_HSA: attributes #[[ATTR8:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR24:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR27]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind sanitize_address "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23:[0-9]+]] = { "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR26]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR27:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR28]] = { nounwind }
 ; ATTRIBUTOR_HSA: attributes #[[ATTR29]] = { "enqueued-block" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 9a9c28ac632f7..201629315a17e 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -4,25 +4,9 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #2
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #2
-
 define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_tgid_x
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; HSA-NEXT:    ret void
@@ -34,13 +18,13 @@ define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_y
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_y
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -52,7 +36,7 @@ define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -60,7 +44,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@multi_use_tgid_y
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
@@ -76,7 +60,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -84,7 +68,7 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -100,13 +84,13 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -118,7 +102,7 @@ define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -126,7 +110,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_x_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -142,7 +126,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -150,7 +134,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_y_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -166,7 +150,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -176,7 +160,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tgid_x_y_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
@@ -196,7 +180,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_tidig_x
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; HSA-NEXT:    ret void
@@ -208,13 +192,13 @@ define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_y
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tidig_y
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -226,13 +210,13 @@ define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tidig_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -244,7 +228,7 @@ define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
 ; HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -260,7 +244,7 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; AKF_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -268,7 +252,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -284,7 +268,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -294,7 +278,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_tidig_x_y_z
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -314,7 +298,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_all_workitems
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; AKF_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; AKF_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -330,7 +314,7 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_all_workitems
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -362,14 +346,14 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
@@ -383,14 +367,14 @@ define amdgpu_kernel void @use_dispatch_ptr(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_queue_ptr(ptr addrspace(1) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
-; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
 ; AKF_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
 ; AKF_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_queue_ptr
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR10:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
 ; ATTRIBUTOR_HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
@@ -404,7 +388,7 @@ define amdgpu_kernel void @use_queue_ptr(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_kernarg_segment_ptr(ptr addrspace(1) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[DISPATCH_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
 ; HSA-NEXT:    [[VAL:%.*]] = load i32, ptr addrspace(4) [[DISPATCH_PTR]], align 4
 ; HSA-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
@@ -418,13 +402,13 @@ define amdgpu_kernel void @use_kernarg_segment_ptr(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(3) [[PTR:%.*]]) #[[ATTR11:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(3) [[PTR]] to ptr
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -436,13 +420,13 @@ define amdgpu_kernel void @use_group_to_flat_addrspacecast(ptr addrspace(3) %ptr
 
 define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
-; AKF_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; AKF_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_private_to_flat_addrspacecast
-; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (ptr addrspace(5) [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(5) [[PTR]] to ptr
 ; ATTRIBUTOR_HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -454,7 +438,7 @@ define amdgpu_kernel void @use_private_to_flat_addrspacecast(ptr addrspace(5) %p
 
 define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_group_addrspacecast
-; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(3)
 ; HSA-NEXT:    store volatile i32 0, ptr addrspace(3) [[FTOS]], align 4
 ; HSA-NEXT:    ret void
@@ -466,7 +450,7 @@ define amdgpu_kernel void @use_flat_to_group_addrspacecast(ptr %ptr) #1 {
 
 define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_private_addrspacecast
-; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(5)
 ; HSA-NEXT:    store volatile i32 0, ptr addrspace(5) [[FTOS]], align 4
 ; HSA-NEXT:    ret void
@@ -479,7 +463,7 @@ define amdgpu_kernel void @use_flat_to_private_addrspacecast(ptr %ptr) #1 {
 ; No-op addrspacecast should not use queue ptr
 define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_global_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(1) [[PTR]] to ptr
 ; HSA-NEXT:    store volatile i32 0, ptr [[STOF]], align 4
 ; HSA-NEXT:    ret void
@@ -491,7 +475,7 @@ define amdgpu_kernel void @use_global_to_flat_addrspacecast(ptr addrspace(1) %pt
 
 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_constant_to_flat_addrspacecast
-; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr addrspace(4) [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[STOF:%.*]] = addrspacecast ptr addrspace(4) [[PTR]] to ptr
 ; HSA-NEXT:    [[LD:%.*]] = load volatile i32, ptr [[STOF]], align 4
 ; HSA-NEXT:    ret void
@@ -503,7 +487,7 @@ define amdgpu_kernel void @use_constant_to_flat_addrspacecast(ptr addrspace(4) %
 
 define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_global_addrspacecast
-; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(1)
 ; HSA-NEXT:    store volatile i32 0, ptr addrspace(1) [[FTOS]], align 4
 ; HSA-NEXT:    ret void
@@ -515,7 +499,7 @@ define amdgpu_kernel void @use_flat_to_global_addrspacecast(ptr %ptr) #1 {
 
 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #1 {
 ; HSA-LABEL: define {{[^@]+}}@use_flat_to_constant_addrspacecast
-; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; HSA-NEXT:    [[FTOS:%.*]] = addrspacecast ptr [[PTR]] to ptr addrspace(4)
 ; HSA-NEXT:    [[LD:%.*]] = load volatile i32, ptr addrspace(4) [[FTOS]], align 4
 ; HSA-NEXT:    ret void
@@ -527,14 +511,14 @@ define amdgpu_kernel void @use_flat_to_constant_addrspacecast(ptr %ptr) #1 {
 
 define amdgpu_kernel void @use_is_shared(ptr %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_is_shared
-; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR]])
 ; AKF_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
 ; AKF_HSA-NEXT:    store i32 [[EXT]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_shared
-; ATTRIBUTOR_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IS_SHARED:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[PTR]])
 ; ATTRIBUTOR_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_SHARED]] to i32
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[EXT]], ptr addrspace(1) undef, align 4
@@ -548,14 +532,14 @@ define amdgpu_kernel void @use_is_shared(ptr %ptr) #1 {
 
 define amdgpu_kernel void @use_is_private(ptr %ptr) #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_is_private
-; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
 ; AKF_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
 ; AKF_HSA-NEXT:    store i32 [[EXT]], ptr addrspace(1) undef, align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_is_private
-; ATTRIBUTOR_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR12]] {
+; ATTRIBUTOR_HSA-SAME: (ptr [[PTR:%.*]]) #[[ATTR11]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[IS_PRIVATE:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[PTR]])
 ; ATTRIBUTOR_HSA-NEXT:    [[EXT:%.*]] = zext i1 [[IS_PRIVATE]] to i32
 ; ATTRIBUTOR_HSA-NEXT:    store i32 [[EXT]], ptr addrspace(1) undef, align 4
@@ -569,13 +553,13 @@ define amdgpu_kernel void @use_is_private(ptr %ptr) #1 {
 
 define amdgpu_kernel void @use_alloca() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca
-; AKF_HSA-SAME: () #[[ATTR2:[0-9]+]] {
+; AKF_HSA-SAME: () #[[ATTR1:[0-9]+]] {
 ; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
 ; AKF_HSA-NEXT:    store i32 0, ptr addrspace(5) [[ALLOCA]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR0]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
 ; ATTRIBUTOR_HSA-NEXT:    store i32 0, ptr addrspace(5) [[ALLOCA]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -587,7 +571,7 @@ define amdgpu_kernel void @use_alloca() #1 {
 
 define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
-; AKF_HSA-SAME: () #[[ATTR2]] {
+; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:  entry:
 ; AKF_HSA-NEXT:    br label [[BB:%.*]]
 ; AKF_HSA:       bb:
@@ -596,7 +580,7 @@ define amdgpu_kernel void @use_alloca_non_entry_block() #1 {
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_non_entry_block
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR0]] {
 ; ATTRIBUTOR_HSA-NEXT:  entry:
 ; ATTRIBUTOR_HSA-NEXT:    br label [[BB:%.*]]
 ; ATTRIBUTOR_HSA:       bb:
@@ -615,13 +599,13 @@ bb:
 
 define void @use_alloca_func() #1 {
 ; AKF_HSA-LABEL: define {{[^@]+}}@use_alloca_func
-; AKF_HSA-SAME: () #[[ATTR2]] {
+; AKF_HSA-SAME: () #[[ATTR1]] {
 ; AKF_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
 ; AKF_HSA-NEXT:    store i32 0, ptr addrspace(5) [[ALLOCA]], align 4
 ; AKF_HSA-NEXT:    ret void
 ;
 ; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_alloca_func
-; ATTRIBUTOR_HSA-SAME: () #[[ATTR13:[0-9]+]] {
+; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] {
 ; ATTRIBUTOR_HSA-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5)
 ; ATTRIBUTOR_HSA-NEXT:    store i32 0, ptr addrspace(5) [[ALLOCA]], align 4
 ; ATTRIBUTOR_HSA-NEXT:    ret void
@@ -638,24 +622,24 @@ attributes #1 = { nounwind }
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
 
 ;.
-; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
-; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-stack-objects" }
+; AKF_HSA: attributes #[[ATTR0]] = { nounwind }
+; AKF_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-stack-objects" }
+; AKF_HSA: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
 ; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 6c5e58c740339..324aba0c11fce 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -2,21 +2,9 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
 ; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
 
-declare i32 @llvm.r600.read.tgid.x() #0
-declare i32 @llvm.r600.read.tgid.y() #0
-declare i32 @llvm.r600.read.tgid.z() #0
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
-declare i32 @llvm.r600.read.tidig.z() #0
-
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
 define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_tgid_x
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
@@ -28,13 +16,13 @@ define amdgpu_kernel void @use_tgid_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_y
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_y
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_CHECK-NEXT:    ret void
@@ -46,7 +34,7 @@ define amdgpu_kernel void @use_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@multi_use_tgid_y
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
@@ -54,7 +42,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@multi_use_tgid_y
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
@@ -70,7 +58,7 @@ define amdgpu_kernel void @multi_use_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -78,7 +66,7 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -94,13 +82,13 @@ define amdgpu_kernel void @use_tgid_x_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; AKF_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; ATTRIBUTOR_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_CHECK-NEXT:    ret void
@@ -112,7 +100,7 @@ define amdgpu_kernel void @use_tgid_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; AKF_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -120,7 +108,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR2]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; ATTRIBUTOR_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -136,7 +124,7 @@ define amdgpu_kernel void @use_tgid_x_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_y_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; AKF_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -144,7 +132,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_y_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
 ; ATTRIBUTOR_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -160,7 +148,7 @@ define amdgpu_kernel void @use_tgid_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z()
@@ -170,7 +158,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z()
@@ -190,7 +178,7 @@ define amdgpu_kernel void @use_tgid_x_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_tidig_x
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
@@ -202,13 +190,13 @@ define amdgpu_kernel void @use_tidig_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_y
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; AKF_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tidig_y
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; ATTRIBUTOR_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_CHECK-NEXT:    ret void
@@ -220,13 +208,13 @@ define amdgpu_kernel void @use_tidig_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z()
 ; AKF_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tidig_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z()
 ; ATTRIBUTOR_CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; ATTRIBUTOR_CHECK-NEXT:    ret void
@@ -238,7 +226,7 @@ define amdgpu_kernel void @use_tidig_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.x()
 ; CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -254,7 +242,7 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; AKF_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -262,7 +250,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
 ; ATTRIBUTOR_CHECK-NEXT:    store volatile i32 [[VAL0]], ptr addrspace(1) [[PTR]], align 4
@@ -278,7 +266,7 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_tidig_x_y_z
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; AKF_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
@@ -288,7 +276,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_tidig_x_y_z
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
@@ -308,7 +296,7 @@ define amdgpu_kernel void @use_tidig_x_y_z(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-LABEL: define {{[^@]+}}@use_all_workitems
-; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; AKF_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; AKF_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; AKF_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; AKF_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
@@ -324,7 +312,7 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 ; AKF_CHECK-NEXT:    ret void
 ;
 ; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@use_all_workitems
-; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
+; ATTRIBUTOR_CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
 ; ATTRIBUTOR_CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
@@ -356,7 +344,7 @@ define amdgpu_kernel void @use_all_workitems(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_get_local_size_x(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_x
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x()
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
@@ -368,7 +356,7 @@ define amdgpu_kernel void @use_get_local_size_x(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_get_local_size_y(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_y
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y()
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
@@ -380,7 +368,7 @@ define amdgpu_kernel void @use_get_local_size_y(ptr addrspace(1) %ptr) #1 {
 
 define amdgpu_kernel void @use_get_local_size_z(ptr addrspace(1) %ptr) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_z
-; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z()
 ; CHECK-NEXT:    store i32 [[VAL]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
@@ -393,17 +381,18 @@ define amdgpu_kernel void @use_get_local_size_z(ptr addrspace(1) %ptr) #1 {
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 
-; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
 ;.
-; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; AKF_CHECK: attributes #[[ATTR0]] = { nounwind }
+; AKF_CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+;.
+; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/anyext.ll b/llvm/test/CodeGen/AMDGPU/anyext.ll
index 897e134ee48d8..5b57bde76e091 100644
--- a/llvm/test/CodeGen/AMDGPU/anyext.ll
+++ b/llvm/test/CodeGen/AMDGPU/anyext.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
 ; GCN-LABEL: anyext_i1_i32:
 ; GCN:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
index d33196ba33109..d2a0f7b9a0874 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll
@@ -1,10 +1,6 @@
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=tahiti -mattr=-promote-alloca < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI %s
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=tahiti -mattr=+promote-alloca < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 ; The required pointer calculations for the alloca'd actually requires
 ; an add and won't be folded into the addressing, which fails with a
 ; 64-bit pointer add. This should work since private pointers should
diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
index a01dc0294ddc7..314a3367c7144 100644
--- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i64.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-
 ; SI-LABEL: {{^}}test_array_ptr_calc:
 ; SI-DAG: v_mul_u32_u24
 ; SI-DAG: v_mul_hi_u32_u24
diff --git a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
index f2794ff970050..0bdd5f5796539 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -157,7 +157,5 @@ define amdgpu_kernel void @ashr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll b/llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll
index e03c9ca34b825..46a18e021eae8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic-optimizer-strict-wqm.ll
@@ -1,10 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefixes=GFX10
 
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.strict.wqm.f32(float)
-
 define amdgpu_ps void @main(i32 %arg) {
 ; GFX10-LABEL: main:
 ; GFX10:       ; %bb.0: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
index 624101dc12c5f..b4b159bcb46f8 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll
@@ -9,11 +9,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W64 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W32 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub(i32, ptr addrspace(8), i32, i32, i32 immarg)
-
 ; Show what the atomic optimization pass will do for raw buffers.
 
 define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
index 8ee0ee3b27bae..0d19f6a256401 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll
@@ -9,8 +9,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX1264 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX1232 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; Show what the atomic optimization pass will do for global pointers.
 
 define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(1) %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 352adacbda3e4..86539bdfcdeb4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -7,8 +7,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX1164 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX1132 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 @local_var32 = addrspace(3) global i32 undef, align 4
 @local_var64 = addrspace(3) global i64 undef, align 8
 
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index 1ebd864e7e03a..2e854e7c17afe 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -7,10 +7,6 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1164 %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -mattr=-flat-for-global - -amdgpu-atomic-optimizer-strategy=DPP -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1132 %s
 
-declare i1 @llvm.amdgcn.wqm.vote(i1)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
 ; Show what the atomic optimization pass will do for raw buffers.
 
 define amdgpu_ps void @add_i32_constant(ptr addrspace(8) inreg %out, ptr addrspace(8) inreg %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
index ca94d68f01917..2005fc5c96c45 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll
@@ -9,10 +9,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W64 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W32 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub(i32, ptr addrspace(8), i32, i32, i32)
-
 ; Show what the atomic optimization pass will do for raw buffers.
 
 define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
index 7e15c07f95269..1e08e7469307c 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll
@@ -9,10 +9,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=-wavefrontsize32,+wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W64 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,-wavefrontsize64 -amdgpu-atomic-optimizer-strategy=Iterative -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX12,GFX12W32 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add(i32, ptr addrspace(8), i32, i32, i32, i32)
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub(i32, ptr addrspace(8), i32, i32, i32, i32)
-
 ; Show what the atomic optimization pass will do for struct buffers.
 
 define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) {
diff --git a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
index 2c69ae58f0e61..84ae084c069a0 100644
--- a/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
@@ -2,10 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
 
-declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3), i32)
-declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32)
-declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr, i32)
-
 define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
 ; GFX12-SDAG-LABEL: flat_atomic_cond_sub_no_rtn_u32:
 ; GFX12-SDAG:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
index ed045107d354d..26149c07874b1 100644
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll
@@ -118,13 +118,6 @@ define amdgpu_kernel void @max_10_sgprs() #0 {
 ;  ret void
 ;}
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
-declare i64 @llvm.amdgcn.dispatch.id() #1
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #1
-
 attributes #0 = { nounwind "amdgpu-num-sgpr"="14" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "amdgpu-num-sgpr"="12" }
diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
index 632f7dbc53373..c4e79f3bf8ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll
@@ -130,6 +130,4 @@ define void @back_off_barrier_with_fence(ptr %in, ptr %out) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier()
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
index fed4d9b6a373f..d55b4ac1212ba 100644
--- a/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/barrier-elimination.ll
@@ -24,8 +24,6 @@ define amdgpu_kernel void @flat_wgs_attr_32_64() #2 {
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #0
-
 attributes #0 = { convergent nounwind }
 attributes #1 = { nounwind "amdgpu-flat-work-group-size"="32,128" }
 attributes #2 = { nounwind "amdgpu-flat-work-group-size"="32,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 98658834e8978..8153ad5fe2883 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -16960,8 +16960,6 @@ define bfloat @v_fdiv_bf16(bfloat %a, bfloat %b) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.fabs.bf16(bfloat)
-
 define bfloat @v_fabs_bf16(bfloat %a) {
 ; GCN-LABEL: v_fabs_bf16:
 ; GCN:       ; %bb.0:
@@ -17100,8 +17098,6 @@ define bfloat @v_fneg_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare i32 @llvm.amdgcn.readfirstlane(i32)
-
 ; FIXME: readfirstlane hack for other bugs
 define amdgpu_ps i32 @s_fneg_bf16(bfloat inreg %a) {
 ; GCN-LABEL: s_fneg_bf16:
@@ -17264,14 +17260,6 @@ define amdgpu_ps i32 @s_fneg_fabs_bf16(bfloat inreg %a) {
   ret i32 %readlane
 }
 
-declare bfloat @llvm.minnum.bf16(bfloat, bfloat)
-declare <2 x bfloat> @llvm.minnum.v2bf16(<2 x bfloat>, <2 x bfloat>)
-declare <3 x bfloat> @llvm.minnum.v3bf16(<3 x bfloat>, <3 x bfloat>)
-declare <4 x bfloat> @llvm.minnum.v4bf16(<4 x bfloat>, <4 x bfloat>)
-declare <8 x bfloat> @llvm.minnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
-declare <16 x bfloat> @llvm.minnum.v16bf16(<16 x bfloat>, <16 x bfloat>)
-declare <32 x bfloat> @llvm.minnum.v32bf16(<32 x bfloat>, <32 x bfloat>)
-
 define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
 ; GCN-LABEL: v_minnum_bf16:
 ; GCN:       ; %bb.0:
@@ -20816,15 +20804,6 @@ define <32 x bfloat> @v_minnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
   ret <32 x bfloat> %op
 }
 
-
-declare bfloat @llvm.maxnum.bf16(bfloat, bfloat)
-declare <2 x bfloat> @llvm.maxnum.v2bf16(<2 x bfloat>, <2 x bfloat>)
-declare <3 x bfloat> @llvm.maxnum.v3bf16(<3 x bfloat>, <3 x bfloat>)
-declare <4 x bfloat> @llvm.maxnum.v4bf16(<4 x bfloat>, <4 x bfloat>)
-declare <8 x bfloat> @llvm.maxnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
-declare <16 x bfloat> @llvm.maxnum.v16bf16(<16 x bfloat>, <16 x bfloat>)
-declare <32 x bfloat> @llvm.maxnum.v32bf16(<32 x bfloat>, <32 x bfloat>)
-
 define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
 ; GCN-LABEL: v_maxnum_bf16:
 ; GCN:       ; %bb.0:
@@ -24369,8 +24348,6 @@ define <32 x bfloat> @v_maxnum_v32bf16(<32 x bfloat> %a, <32 x bfloat> %b) {
   ret <32 x bfloat> %op
 }
 
-declare bfloat @llvm.sqrt.bf16(bfloat)
-
 define bfloat @v_sqrt_bf16(bfloat %a) {
 ; GCN-LABEL: v_sqrt_bf16:
 ; GCN:       ; %bb.0:
@@ -24554,8 +24531,6 @@ define bfloat @v_sqrt_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.ldexp.bf16.i32(bfloat, i32)
-
 define bfloat @v_ldexp_bf16_i32(bfloat %a, i32 %b) {
 ; GCN-LABEL: v_ldexp_bf16_i32:
 ; GCN:       ; %bb.0:
@@ -24635,8 +24610,6 @@ define bfloat @v_ldexp_bf16_i32(bfloat %a, i32 %b) {
   ret bfloat %op
 }
 
-declare { bfloat, i16 } @llvm.frexp.bf16.i16(bfloat)
-
 define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
 ; GCN-LABEL: v_frexp_bf16_i16:
 ; GCN:       ; %bb.0:
@@ -24709,11 +24682,6 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
   ret { bfloat, i16 } %op
 }
 
-
-declare bfloat @llvm.log.bf16(bfloat)
-declare bfloat @llvm.log2.bf16(bfloat)
-declare bfloat @llvm.log10.bf16(bfloat)
-
 define bfloat @v_log_bf16(bfloat %a) {
 ; GCN-LABEL: v_log_bf16:
 ; GCN:       ; %bb.0:
@@ -25195,10 +25163,6 @@ define bfloat @v_log10_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.exp.bf16(bfloat)
-declare bfloat @llvm.exp2.bf16(bfloat)
-declare bfloat @llvm.exp10.bf16(bfloat)
-
 define bfloat @v_exp_bf16(bfloat %a) {
 ; GCN-LABEL: v_exp_bf16:
 ; GCN:       ; %bb.0:
@@ -25680,8 +25644,6 @@ define bfloat @v_exp10_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.ceil.bf16(bfloat)
-
 define bfloat @v_ceil_bf16(bfloat %a) {
 ; GCN-LABEL: v_ceil_bf16:
 ; GCN:       ; %bb.0:
@@ -25761,8 +25723,6 @@ define bfloat @v_ceil_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.trunc.bf16(bfloat)
-
 define bfloat @v_trunc_bf16(bfloat %a) {
 ; GCN-LABEL: v_trunc_bf16:
 ; GCN:       ; %bb.0:
@@ -25842,8 +25802,6 @@ define bfloat @v_trunc_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.rint.bf16(bfloat)
-
 define bfloat @v_rint_bf16(bfloat %a) {
 ; GCN-LABEL: v_rint_bf16:
 ; GCN:       ; %bb.0:
@@ -25923,8 +25881,6 @@ define bfloat @v_rint_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.nearbyint.bf16(bfloat)
-
 define bfloat @v_nearbyint_bf16(bfloat %a) {
 ; GCN-LABEL: v_nearbyint_bf16:
 ; GCN:       ; %bb.0:
@@ -26004,8 +25960,6 @@ define bfloat @v_nearbyint_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.round.bf16(bfloat)
-
 define bfloat @v_round_bf16(bfloat %a) {
 ; GCN-LABEL: v_round_bf16:
 ; GCN:       ; %bb.0:
@@ -26121,8 +26075,6 @@ define bfloat @v_round_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.roundeven.bf16(bfloat)
-
 define bfloat @v_roundeven_bf16(bfloat %a) {
 ; GCN-LABEL: v_roundeven_bf16:
 ; GCN:       ; %bb.0:
@@ -26202,8 +26154,6 @@ define bfloat @v_roundeven_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.floor.bf16(bfloat)
-
 define bfloat @v_floor_bf16(bfloat %a) {
 ; GCN-LABEL: v_floor_bf16:
 ; GCN:       ; %bb.0:
@@ -26283,8 +26233,6 @@ define bfloat @v_floor_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.canonicalize.bf16(bfloat)
-
 define bfloat @v_canonicalize_bf16(bfloat %a) {
 ; GCN-LABEL: v_canonicalize_bf16:
 ; GCN:       ; %bb.0:
@@ -26362,8 +26310,6 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
   ret bfloat %op
 }
 
-declare bfloat @llvm.arithmetic.fence.bf16(bfloat)
-
 ; FIXME: Promotion broken
 ; define bfloat @v_arithmetic_fence_bf16(bfloat %a) {
 ;   %op = call bfloat @llvm.arithmetic.fence.bf16(bfloat %a)
@@ -27269,8 +27215,6 @@ define i1 @v_fcmp_true_bf16(bfloat %a, bfloat %b) {
   ret i1 %op
 }
 
-declare bfloat @llvm.copysign.bf16(bfloat, bfloat)
-
 define bfloat @v_copysign_bf16_bf16(bfloat %mag, bfloat %sign) {
 ; GCN-LABEL: v_copysign_bf16_bf16:
 ; GCN:       ; %bb.0:
@@ -27919,8 +27863,6 @@ define amdgpu_ps i32 @s_copysign_bf16_f16(bfloat inreg %mag, half inreg %sign.f1
   ret i32 %readlane
 }
 
-declare float @llvm.copysign.f32(float, float)
-
 define float @v_copysign_f32_bf16(float %mag, bfloat %sign.bf16) {
 ; GCN-LABEL: v_copysign_f32_bf16:
 ; GCN:       ; %bb.0:
@@ -28029,8 +27971,6 @@ define amdgpu_ps i32 @s_copysign_f32_bf16(float inreg %mag, bfloat inreg %sign.b
   ret i32 %readlane
 }
 
-declare half @llvm.copysign.f16(half, half)
-
 define half @v_copysign_f16_bf16(half %mag, bfloat %sign.bf16) {
 ; GCN-LABEL: v_copysign_f16_bf16:
 ; GCN:       ; %bb.0:
@@ -28158,8 +28098,6 @@ define amdgpu_ps i32 @s_copysign_f16_bf16(half inreg %mag, bfloat inreg %sign.bf
   ret i32 %readlane
 }
 
-declare double @llvm.copysign.f64(double, double)
-
 define double @v_copysign_f64_bf16(double %mag, bfloat %sign.bf16) {
 ; GCN-LABEL: v_copysign_f64_bf16:
 ; GCN:       ; %bb.0:
@@ -38712,11 +38650,6 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
   ret <32 x bfloat> %op
 }
 
-declare bfloat @llvm.fma.bf16(bfloat, bfloat, bfloat)
-declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>)
-declare <3 x bfloat> @llvm.fma.v3bf16(<3 x bfloat>, <3 x bfloat>, <3 x bfloat>)
-declare <4 x bfloat> @llvm.fma.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x bfloat>)
-
 define bfloat @v_fma_bf16(bfloat %a, bfloat %b, bfloat %c) {
 ; GCN-LABEL: v_fma_bf16:
 ; GCN:       ; %bb.0:
@@ -39397,11 +39330,6 @@ define <4 x bfloat> @v_fma_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat>
   ret <4 x bfloat> %op
 }
 
-declare bfloat @llvm.fmuladd.bf16(bfloat, bfloat, bfloat)
-declare <2 x bfloat> @llvm.fmuladd.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>)
-declare <3 x bfloat> @llvm.fmuladd.v3bf16(<3 x bfloat>, <3 x bfloat>, <3 x bfloat>)
-declare <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x bfloat>)
-
 define bfloat @v_fmuladd_bf16(bfloat %a, bfloat %b, bfloat %c) {
 ; GCN-LABEL: v_fmuladd_bf16:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll
index 0f20ed1320dad..d7815b351a497 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll
@@ -137,5 +137,3 @@ define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x
   store i32 %val, ptr addrspace(1) %arg, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
index af4116bd6aae5..3abde25a8f2ac 100644
--- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
@@ -534,7 +534,5 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll
index 0daa14a63f21a..b0b87abe1e4e1 100644
--- a/llvm/test/CodeGen/AMDGPU/big_alu.ll
+++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll
@@ -1297,14 +1297,5 @@ ENDIF178:                                         ; preds = %IF179, %ENDIF175
   ret void
 }
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-declare float @llvm.r600.recipsqrt.clamped.f32(float) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll b/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
index 58f062ba778bd..cd2e5934b037b 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-v4f16-v4i16.ll
@@ -27,9 +27,5 @@ define amdgpu_ps void @test_to_half(ptr addrspace(8) inreg, <4 x i16> inreg) #0
   ret void
 }
 
-declare <4 x half> @llvm.amdgcn.wqm.v4f16(<4 x half>) #1
-declare <4 x i16> @llvm.amdgcn.wqm.v4i16(<4 x i16>) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0
-
 attributes #0 = { nounwind memory(argmem: write) }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
index 80732d5de1e20..df7150becbf51 100644
--- a/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitcast-vector-extract.ll
@@ -87,7 +87,5 @@ define amdgpu_kernel void @store_value_lowered_to_undef_bitcast_source_extractel
   ret void
 }
 
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 64555f14a55cc..c0b0dc839e738 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -6,18 +6,6 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11-FLAT
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -global-isel -verify-machineinstrs | FileCheck %s --check-prefix=GFX11-GISEL
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-declare i16 @llvm.bitreverse.i16(i16) #1
-declare i32 @llvm.bitreverse.i32(i32) #1
-declare i64 @llvm.bitreverse.i64(i64) #1
-
-declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) #1
-declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) #1
-
-declare <2 x i64> @llvm.bitreverse.v2i64(<2 x i64>) #1
-declare <4 x i64> @llvm.bitreverse.v4i64(<4 x i64>) #1
-
 define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #0 {
 ; SI-LABEL: s_brev_i16:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 384715a849c1e..a3dff40bbf255 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -1329,7 +1329,5 @@ bb196:
 
 declare void @f2(i64)
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
index 6201d7341898f..4ab32d80065c8 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-spill.ll
@@ -1937,6 +1937,4 @@ bb3:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
index b427b011f5051..8cea2cd6053e1 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-debug-info.mir
@@ -47,9 +47,6 @@
     ret void
   }
 
-  declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
   attributes #0 = { nounwind readnone speculatable willreturn }
   attributes #1 = { nounwind writeonly }
   attributes #2 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
index 903bc85ed6616..2034dcc284f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation-gfx10-branch-offset-bug.ll
@@ -99,5 +99,3 @@ bb3:
   store volatile i32 %cnd0, ptr addrspace(1) %arg
   ret void
 }
-
-declare void @llvm.amdgcn.s.sleep(i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 2f637df4e9302..8bd39257ab6cc 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -14,9 +14,6 @@
 
 ; Used to emit an always 4 byte instruction. Inline asm always assumes
 ; each instruction is the maximum size.
-declare void @llvm.amdgcn.s.sleep(i32) #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 
 define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index e4c7df385d861..c3916d4876d34 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -3,19 +3,6 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
 
-declare i16 @llvm.bswap.i16(i16) nounwind readnone
-declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) nounwind readnone
-declare <3 x i16> @llvm.bswap.v3i16(<3 x i16>) nounwind readnone
-declare <4 x i16> @llvm.bswap.v4i16(<4 x i16>) nounwind readnone
-declare i32 @llvm.bswap.i32(i32) nounwind readnone
-declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone
-declare <8 x i32> @llvm.bswap.v8i32(<8 x i32>) nounwind readnone
-declare i64 @llvm.bswap.i64(i64) nounwind readnone
-declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) nounwind readnone
-declare <4 x i64> @llvm.bswap.v4i64(<4 x i64>) nounwind readnone
-declare i48 @llvm.bswap.i48(i48) #1
-
 define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_bswap_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
index 41eb2b7bb2748..4671190387f8c 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-no-rtn.ll
@@ -327,9 +327,3 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
-
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
index f964da2ddf402..32a6c68ce979b 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f32-rtn.ll
@@ -342,9 +342,3 @@ define amdgpu_ps float @buffer_ptr_atomic_fadd_f32_bothen_rtn(float %val, ptr ad
   %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret float %ret
 }
-
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
index 89a29526320c1..9fdfcb2404243 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.f64.ll
@@ -437,9 +437,3 @@ define amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr
   %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0)
   ret double %ret
 }
-
-declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
index 0b62977613f1d..dee7ea856224f 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-no-rtn.ll
@@ -326,9 +326,3 @@ define amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %va
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret void
 }
-
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
index 8c33683c0f5e0..1c2999df54a12 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-atomic-fadd.v2f16-rtn.ll
@@ -187,9 +187,3 @@ define amdgpu_ps <2 x half> @buffer_ptr_atomic_fadd_v2f16_bothen_rtn(<2 x half>
   %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
index f4cd19a2ffa80..b76d8eb110c1a 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-intrinsics-mmo-offsets.ll
@@ -592,41 +592,6 @@ bb.0:
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #2
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) #2
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #2
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #3
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #5
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #5
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #4
-
-
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #2
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #2
-
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #3
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #5
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #5
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #4
-
 
 attributes #0 = { nounwind readonly }
 attributes #1 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
index 16f29cc329976..03467b412c2ac 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-rsrc-ptr-ops.ll
@@ -108,6 +108,3 @@ main_body:
   store %fat_buffer_struct %arg, ptr addrspace(1) %dest.next
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
index bdc73e5b99720..9e4021ffb5a32 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-schedule.ll
@@ -83,19 +83,6 @@ define amdgpu_cs void @test1_ptrs_reorderable(ptr addrspace(8) inreg %buf, i32 %
   ret void
 }
 
-
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
-
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3
-
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
-
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3
-
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32) #4
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8) nocapture, i32, i32, i32) #5
-
 attributes #2 = { nounwind readonly }
 attributes #3 = { nounwind writeonly }
 attributes #4 = { nounwind memory(argmem: read) }
diff --git a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
index f6fc69a6e3e47..e1dbbeebb8cd1 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-cselect-b64.ll
@@ -30,6 +30,3 @@ bb:
   call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %i6, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0)
   ret <2 x i32> %i6
 }
-
-declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
index 98590c227859b..cabd267dfffc6 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-deadlanes.ll
@@ -81,10 +81,4 @@ bb1897:                                           ; preds = %bb1789, %bb1787
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
-declare float @llvm.amdgcn.fmed3.f32(float, float, float)
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg)
-
 attributes #0 = { "target-features"=",+wavefrontsize64,+cumode" }
diff --git a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
index 82808cd309227..edb04383faac4 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-sdag-emitcopyfromreg.ll
@@ -132,5 +132,3 @@ bb21:
   store float %i15, ptr %ptr, align 4
   ret void
 }
-
-declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
index 0784d13e588d4..819a9aaefaf63 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
+++ b/llvm/test/CodeGen/AMDGPU/bug-vopc-commute.ll
@@ -41,7 +41,6 @@ ENDIF62:                                          ; preds = %ENDIF59
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
index 1f0e09371d6d5..c679e36127499 100644
--- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
+++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
@@ -42,9 +42,6 @@ bb1:
 }
 declare hidden void @external_void_func_void() #0
 
-declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #3
-declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #3
-
 attributes #0 = { nounwind }
 attributes #1 = { noinline norecurse nounwind }
 attributes #2 = { nounwind norecurse "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
index f1992d71eb1de..bb9565dd199d0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-constexpr.ll
@@ -107,8 +107,6 @@ define hidden i32 @ident_i32(i32 %i) #0 {
   ret i32 %i
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind noinline }
 attributes #1 = { alwaysinline nounwind }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
index ea2bba1673a0b..e42de76bb2e18 100644
--- a/llvm/test/CodeGen/AMDGPU/call-skip.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -57,8 +57,6 @@ end:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind noinline }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
index b711542be5a7f..be3882f69f8b8 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs-fixed-abi.ll
@@ -568,15 +568,6 @@ define hidden void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind noinline }
 attributes #2 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="0" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
index 1d2523d364e55..cc4b9b3a8ffc6 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs-packed.ll
@@ -756,9 +756,5 @@ define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz()
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
 attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" }
 attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
index 5e6f377da28e1..88a7e061ffb22 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-vgprs.ll
@@ -804,10 +804,6 @@ define amdgpu_kernel void @kern_callsite_workitem_id_hints() #2 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind noinline }
 attributes #2 = { nounwind "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
index 927e45f029488..450d3306cd170 100644
--- a/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
+++ b/llvm/test/CodeGen/AMDGPU/captured-frame-index.ll
@@ -190,8 +190,5 @@ entry:
   ret void
 }
 
-declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #1
-declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index 9a22635e880f1..3e1070c95650e 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -2938,18 +2938,6 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
   ret void
 }
 
-
-
-declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
-
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
-
-declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
-
-declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll b/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll
index fe173bf897539..97c59291ff77e 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-sgpr-limit.ll
@@ -134,5 +134,3 @@ define amdgpu_gs void @_amdgpu_gs_sgpr_limit_i64(i64 inreg, i64 inreg, i64 inreg
   call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %36, ptr addrspace(8) %addr, i32 4, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll b/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll
index b427aee3fc370..5b75f95c1e978 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-sgpr-over-limit.ll
@@ -97,5 +97,3 @@ define amdgpu_gs { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i
   %135 = insertvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %134, i32 %90, 44
   ret { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %135
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
index fdae1696a5a49..2e0ae0f3d43c6 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
@@ -854,8 +854,6 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind argmemonly }
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
index 1588dde19cfb7..bcb5334bf36b9 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
@@ -62,9 +62,6 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
 attributes #0 = { argmemonly nounwind }
 attributes #1 = { nounwind readnone willreturn }
 attributes #2 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
index ac50fb86c96f7..a0c787adc4ebf 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
@@ -65,9 +65,6 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #2
-
 attributes #0 = { argmemonly nounwind }
 attributes #1 = { nounwind readnone willreturn }
 attributes #2 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
index 49f9f695409b1..893ace241d0b6 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -740,10 +740,6 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #3
-declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #3
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind argmemonly }
diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
index 0a0179e866cd3..ec67171fe71c5 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll
@@ -1603,27 +1603,6 @@ define <2 x half> @v_clamp_cvt_pkrtz_src_v2f16_denorm(float %a, float %b) #0 {
   ret <2 x half> %clamp
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
-
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "denormal-fp-math-f32"="ieee.ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll
index 947284506a297..e03c17e83c5d4 100644
--- a/llvm/test/CodeGen/AMDGPU/clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/clamp.ll
@@ -4044,21 +4044,6 @@ define amdgpu_kernel void @v_clamp_diff_source_f32(ptr addrspace(1) %out, ptr ad
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-nans-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
index b6948dab6bf9f..5e1150fe361a9 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
@@ -480,8 +480,3 @@ entry:
   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %val, i32 15, i32 %x, i32 %y, <8 x i32> %dst, i32 0, i32 0)
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
index 643f2619840a2..59f4b6d556298 100644
--- a/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/cndmask-no-def-vcc.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i1 @llvm.amdgcn.class.f32(float, i32)
-
 ; Produces error after adding an implicit def to v_cndmask_b32
 
 ; GCN-LABEL: {{^}}vcc_shrink_vcc_def:
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
index 9edf566335925..7bb54009057a2 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
@@ -33,5 +33,3 @@ entry:
   store <8 x i16> %vecinit471, ptr addrspace(1) undef, align 16
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll
index 2d8ef336f52d6..15b723dc72a69 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalescer_distribute.ll
@@ -48,5 +48,3 @@ bb32:
 bb34:
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll b/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll
index 12057caec6e84..101b1df750622 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalescer_remat.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -verify-machineinstrs -mtriple=amdgcn-- -o - %s | FileCheck %s
 
-declare float @llvm.fma.f32(float, float, float)
-
 ; This checks that rematerialization support of the coalescer does not
 ; unnecessarily widen the register class. Without those fixes > 20 VGprs
 ; are used here
diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
index 74b9d98257fb3..948e75abda407 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1100 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1150 %s
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.fma.f32(float, float, float)
-
 define float @v_mul_f32_vop2(float %x, float %y) {
 ; GFX9-LABEL: v_mul_f32_vop2:
 ; GFX9:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
index 6bc8d29b3bf7c..0830034cd3a44 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
@@ -1431,9 +1431,6 @@ bb12:                                             ; preds = %bb10
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind convergent }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
index a25078230385e..d745c65e4eff8 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-add-zext-xor.ll
@@ -490,7 +490,6 @@ bb9:                                              ; preds = %bb, %.a
 
 
 ; Function Attrs: nounwind readonly willreturn
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) nocapture, i32, i32, i32 immarg) #0
 
 attributes #0 = { nounwind willreturn memory(argmem: read) }
 
diff --git a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
index b763bec89eef3..d44e16a92d7b4 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
@@ -20,8 +20,4 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
index 33c0d90f94a39..849c2a6257300 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
@@ -740,10 +740,4 @@ bb:
   ret void
 }
 
-declare i1 @llvm.amdgcn.class.f32(float, i32) #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/combine-ftrunc.ll b/llvm/test/CodeGen/AMDGPU/combine-ftrunc.ll
index 2cbd1b47a87d6..c28767e4b888c 100644
--- a/llvm/test/CodeGen/AMDGPU/combine-ftrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine-ftrunc.ll
@@ -80,13 +80,3 @@ define amdgpu_kernel void @combine_ftrunc_ftrunc_f32(ptr addrspace(1) %p) {
   store float %trunc, ptr addrspace(1) %p, align 4
   ret void
 }
-
-declare double @llvm.trunc.f64(double)
-declare float @llvm.trunc.f32(float)
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>)
-declare double @llvm.rint.f64(double)
-declare float @llvm.rint.f32(float)
-declare <2 x float> @llvm.rint.v2f32(<2 x float>)
-declare float @llvm.ceil.f32(float)
-declare float @llvm.floor.f32(float)
-declare float @llvm.nearbyint.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
index 10d71a315fbf9..bfb4a8f30aa58 100644
--- a/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
+++ b/llvm/test/CodeGen/AMDGPU/combine_andor_with_cmps.ll
@@ -3190,10 +3190,5 @@ define i1 @test153(double %arg1, double %arg2, double %arg3) {
   ret i1 %or1
 }
 
-declare double @llvm.canonicalize.f64(double)
-declare float @llvm.canonicalize.f32(float)
-declare half @llvm.canonicalize.f16(half)
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>)
-
 attributes #0 = { nounwind "amdgpu-ieee"="false" }
 attributes #1 = { nounwind "unsafe-fp-math"="true" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index d94e75c8c8e22..63f7f6afeaf74 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 ; --------------------------------------------------------------------------------
 ; i32 compares
 ; --------------------------------------------------------------------------------
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 849348a7be53d..0da522dfaa0e0 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -43,9 +43,6 @@ bb:
   ret float %tmp9
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
index d9eca0d1cca9a..dc13662643e60 100644
--- a/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute_modifiers.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-
 ; FUNC-LABEL: @commute_add_imm_fabs_f32
 ; SI: buffer_load_dword [[X:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; SI: v_add_f32_e64 [[REG:v[0-9]+]], |[[X]]|, 2.0
diff --git a/llvm/test/CodeGen/AMDGPU/complex-folding.ll b/llvm/test/CodeGen/AMDGPU/complex-folding.ll
index cf19b92e3fd8c..50e0473c79968 100644
--- a/llvm/test/CodeGen/AMDGPU/complex-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/complex-folding.ll
@@ -14,4 +14,3 @@ entry:
 }
 
 declare float @fabsf(float ) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll b/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll
index 11795cca18daa..fd240f5338a41 100644
--- a/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/computeNumSignBits-mul.ll
@@ -168,5 +168,3 @@ define i32 @known_bits_mul24() {
   %r1 = shl i32 %r0, 2
   ret i32 %r1
 }
-
-declare i32 @llvm.amdgcn.mul.i24(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
index 8cb7d6651a08c..d036c794c59d7 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
@@ -311,12 +311,8 @@ main_body:
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #7
-
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #7
 
 !0 = !{}
 
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
index 236dee7c3b825..25f3baf27cdfc 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-mi-operands.ll
@@ -136,9 +136,5 @@ define amdgpu_kernel void @fold_mi_and_neg1(ptr addrspace(1) %out) {
   ret void
 }
 
-declare i64 @llvm.ctpop.i64(i64) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index 789150f690d52..6d01fe0061a2d 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -271,8 +271,6 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll b/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
index 5ceea9ef63a4a..87557cc7751c7 100644
--- a/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
+++ b/llvm/test/CodeGen/AMDGPU/control-flow-optnone.ll
@@ -49,7 +49,5 @@ bb14:                                             ; preds = %bb13, %bb9
   br i1 %tmp15, label %bb8.loopexit, label %bb9
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind optnone noinline }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
index 6beccce9400e5..673c547fc9310 100644
--- a/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergence-tokens.ll
@@ -110,11 +110,5 @@ define void @tail_call_void_func_void() #0 {
 declare hidden void @external_void_func_void() #0
 declare i32 @foo(i32 %x) #0
 
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
-
-declare token @llvm.experimental.convergence.entry()
-declare token @llvm.experimental.convergence.anchor()
-declare token @llvm.experimental.convergence.loop()
-
 attributes #0 = { nounwind readnone convergent }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
index bd523d4ac30b9..9ae568509bfb1 100644
--- a/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
+++ b/llvm/test/CodeGen/AMDGPU/convergent-inlineasm.ll
@@ -1,6 +1,4 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 ; GCN-LABEL: {{^}}convergent_inlineasm:
 ; GCN: %bb.0:
 ; GCN: v_cmp_ne_u32_e64
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index e9dbce9026ca0..830e4f715abcb 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-sdwa-peephole=0 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=VI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 define amdgpu_kernel void @test_copy_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
 ; SI-LABEL: test_copy_v4i8:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
index a0e76f9a47a8a..9492326c90535 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-to-reg-scc-clobber.ll
@@ -60,5 +60,3 @@ entry:
   store i32 %cond14.1, ptr addrspace(1) %pout.coerce, align 4
   ret void
 }
-
-declare i32 @llvm.smin.i32(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/copy_to_scc.ll b/llvm/test/CodeGen/AMDGPU/copy_to_scc.ll
index 7dd95a02f136b..1e481257f1bb3 100644
--- a/llvm/test/CodeGen/AMDGPU/copy_to_scc.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy_to_scc.ll
@@ -35,5 +35,3 @@ entry:                                             ; preds = %1009
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/cse-convergent.ll b/llvm/test/CodeGen/AMDGPU/cse-convergent.ll
index 0d74bd39b56fe..c21d838e03508 100644
--- a/llvm/test/CodeGen/AMDGPU/cse-convergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/cse-convergent.ll
@@ -67,7 +67,3 @@ end:
   %ret = add i32 %x3, %y
   ret i32 %ret
 }
-
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
index c98da81264744..fefe7023cc336 100644
--- a/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
+++ b/llvm/test/CodeGen/AMDGPU/cse-phi-incoming-val.ll
@@ -36,7 +36,6 @@ bb10:                                             ; preds = %bb9, %bb5, %bb3, %b
 }
 
 ; Function Attrs: inaccessiblememonly nounwind
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #1
 
 attributes #0 = { nounwind }
 attributes #1 = { inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index 4decf39d04013..84e96b6b466ba 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -6,20 +6,6 @@
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX10-GISEL
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX11
 
-declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
-declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
-
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
-
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
-declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @s_ctlz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_ctlz_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index 21aff62b9226d..809a7641f00e8 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -4,20 +4,6 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
 
-declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
-
-declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
-
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
-
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
-declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @s_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_ctlz_zero_undef_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop.ll b/llvm/test/CodeGen/AMDGPU/ctpop.ll
index e1d20095573da..01634effff9df 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop.ll
@@ -2,14 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=FUNC -check-prefix=VI %s
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG -check-prefix=FUNC %s
 
-declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32>) nounwind readnone
-declare <8 x i32> @llvm.ctpop.v8i32(<8 x i32>) nounwind readnone
-declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}s_ctpop_i32:
 ; GCN: s_load_dword [[SVAL:s[0-9]+]],
 ; GCN: s_bcnt1_i32_b32 [[SRESULT:s[0-9]+]], [[SVAL]]
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop16.ll b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
index b6359f1816979..407141e143e11 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop16.ll
@@ -3,14 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=EG %s
 
-declare i16 @llvm.ctpop.i16(i16) nounwind readnone
-declare <2 x i16> @llvm.ctpop.v2i16(<2 x i16>) nounwind readnone
-declare <4 x i16> @llvm.ctpop.v4i16(<4 x i16>) nounwind readnone
-declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16>) nounwind readnone
-declare <16 x i16> @llvm.ctpop.v16i16(<16 x i16>) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @s_ctpop_i16(ptr addrspace(1) noalias %out, i16 %val) nounwind {
 ; SI-LABEL: s_ctpop_i16:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
index 131ce14a7847c..82e462527dc4c 100644
--- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll
@@ -2,17 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
-declare i64 @llvm.ctpop.i64(i64) nounwind readnone
-declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64>) nounwind readnone
-declare <4 x i64> @llvm.ctpop.v4i64(<4 x i64>) nounwind readnone
-declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>) nounwind readnone
-declare <16 x i64> @llvm.ctpop.v16i64(<16 x i64>) nounwind readnone
-
-declare i65 @llvm.ctpop.i65(i65) nounwind readnone
-declare i128 @llvm.ctpop.i128(i128) nounwind readnone
-
 define amdgpu_kernel void @s_ctpop_i64(ptr addrspace(1) noalias %out, [8 x i32], i64 %val) nounwind {
 ; SI-LABEL: s_ctpop_i64:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 118d6c123046b..e49b80b689a41 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -5,20 +5,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX10
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -enable-var-scope --check-prefix=GFX10-GISEL
 
-declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone
-declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
-declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
-
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
-
-declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
-declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) nounwind readnone
-declare <4 x i64> @llvm.cttz.v4i64(<4 x i64>, i1) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @s_cttz_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_cttz_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 71f1cd54d705c..32dc337a98971 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -4,15 +4,6 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=EG %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9-GISEL %s
 
-declare i7 @llvm.cttz.i7(i7, i1) nounwind readnone
-declare i8 @llvm.cttz.i8(i8, i1) nounwind readnone
-declare i16 @llvm.cttz.i16(i16, i1) nounwind readnone
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
-declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) nounwind readnone
-declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @s_cttz_zero_undef_i32(ptr addrspace(1) noalias %out, i32 %val) nounwind {
 ; SI-LABEL: s_cttz_zero_undef_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/cube.ll b/llvm/test/CodeGen/AMDGPU/cube.ll
index 49d8276eda418..ab39073e0af86 100644
--- a/llvm/test/CodeGen/AMDGPU/cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/cube.ll
@@ -1,11 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-declare float @llvm.amdgcn.cubesc(float, float, float) #0
-declare float @llvm.amdgcn.cubetc(float, float, float) #0
-declare float @llvm.amdgcn.cubema(float, float, float) #0
-
 ; GCN-LABEL: {{^}}cube:
 ; GCN-DAG: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GCN-DAG: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 028a28ed9a23b..7ff111c467dcd 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -5,9 +5,6 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx908 -start-before=amdgpu-isel -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 define float @v_uitofp_i32_to_f32_mask255(i32 %arg0) nounwind {
 ; GCN-LABEL: v_uitofp_i32_to_f32_mask255:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
index c34d669145dde..3e4c33de71bcf 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_flr_i32_f32.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-
 ; FUNC-LABEL: {{^}}cvt_flr_i32_f32_0:
 ; SI-SAFE-NOT: v_cvt_flr_i32_f32
 ; SI-NOT: add
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
index d4bafa12af129..0091458479a26 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_rpi_i32_f32.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -enable-no-nans-fp-math -verify-machineinstrs < %s | FileCheck -check-prefix=SI-NONAN -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=SI-SAFE -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-
 ; FUNC-LABEL: {{^}}cvt_rpi_i32_f32:
 ; SI-SAFE-NOT: v_cvt_rpi_i32_f32
 ; SI-NONAN: v_cvt_rpi_i32_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
index fed4b9862dbfb..5a7b0292cefe1 100644
--- a/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
@@ -901,22 +901,3 @@ define protected amdgpu_kernel void @buffer.ptr.atomic.fmax(ptr addrspace(8) %rs
   store float 1.0, ptr addrspace(1) %p1
   ret void
 }
-
-declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1), double)
-declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1), double)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32)
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
index 09a1f45557608..09682e4dff0e4 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-crash.ll
@@ -112,5 +112,3 @@ bb:
   %ret = fadd float %i10, %i13
   ret float %ret
 }
-
-declare float @llvm.maxnum.f32(float, float)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 451f64f71282b..c0b0f06e1aea2 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -409,28 +409,20 @@ define amdgpu_ps float @fma_vs_output_modifier_2(float %x) #0 {
 }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.maxnum.f32(float, float) #1
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #2
 
 ; Function Attrs: nounwind readonly willreturn
-declare <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nounwind readonly willreturn
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.load.mip.2d.v3f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #3
 
 ; Function Attrs: nounwind readnone willreturn
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32 immarg) #3
 
 attributes #0 = { "denormal-fp-math-f32"="preserve-sign" }
 attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
index 18b250d148f5a..77941423b62c8 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-reassociate-bug.ll
@@ -28,6 +28,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
index cdd4c72f3717f..cb2768d8d20c3 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
+++ b/llvm/test/CodeGen/AMDGPU/debug-value-scheduler-crash.mir
@@ -2,8 +2,6 @@
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machine-scheduler -verify-machineinstrs %s -o - | FileCheck %s
 --- |
 
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
   define amdgpu_kernel void @could_not_use_debug_inst_to_query_mi2mimap() #1 {
     ret void
   }
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value.ll b/llvm/test/CodeGen/AMDGPU/debug-value.ll
index 127b23ed3bb23..12c770d85feb3 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value.ll
@@ -83,7 +83,6 @@ bb28:                                             ; preds = %bb25, %bb21
 
 declare float @barney() #2
 declare void @eggs(float) #2
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
 
 attributes #0 = { convergent nounwind "target-cpu"="gfx900" }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/debug-value2.ll b/llvm/test/CodeGen/AMDGPU/debug-value2.ll
index 1d4c11de4076c..b775bfd5dbf25 100644
--- a/llvm/test/CodeGen/AMDGPU/debug-value2.ll
+++ b/llvm/test/CodeGen/AMDGPU/debug-value2.ll
@@ -2,10 +2,6 @@
 
 %struct.ShapeData = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, i32, i32, i64, <4 x float>, i32, i8, i8, i16, i32, i32 }
 
-declare float @llvm.fmuladd.f32(float, float, float)
-
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
-
 declare ptr addrspace(1) @Scene_getSubShapeData(i32, ptr addrspace(1), ptr addrspace(1)) local_unnamed_addr
 
 define <4 x float> @Scene_transformT(i32 %subshapeIdx, <4 x float> %v, float %time, ptr addrspace(1) %gScene, ptr addrspace(1) %gSceneOffsets) local_unnamed_addr !dbg !110 {
@@ -268,14 +264,6 @@ qtSet.exit:                                       ; preds = %bb141, %entry
   ret <4 x float> %tmp162
 }
 
-declare float @llvm.copysign.f32(float, float)
-
-declare i1 @llvm.amdgcn.class.f32(float, i32)
-
-declare float @llvm.amdgcn.rsq.f32(float)
-
-declare void @llvm.dbg.value(metadata, metadata, metadata)
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!104, !105, !106}
 !opencl.ocl.version = !{!107}
diff --git a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
index 49486adda6f2e..6af669a9dc477 100644
--- a/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
+++ b/llvm/test/CodeGen/AMDGPU/default-fp-mode.ll
@@ -168,8 +168,6 @@ entry:
   ret float %tmp1
 }
 
-declare void @llvm.amdgcn.kill(i1)
-
 attributes #0 = { nounwind "target-cpu"="tahiti" }
 attributes #1 = { nounwind "target-cpu"="fiji" }
 attributes #2 = { nounwind "denormal-fp-math"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
index 730df533b731a..7a14a7dbab980 100644
--- a/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
+++ b/llvm/test/CodeGen/AMDGPU/disable_form_clauses.ll
@@ -62,7 +62,5 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 attributes #0 = { "amdgpu-max-memory-clause"="1" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll b/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
index af274a9ab1c82..3d6cb50ec638f 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-extra-formal-args.ll
@@ -23,8 +23,5 @@ define dllexport amdgpu_vs void @_amdgpu_vs_main(i32 inreg %arg, i32 inreg %arg1
   ret void
 }
 
-declare i64 @llvm.amdgcn.s.getpc() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
index 243618ae5a63e..8e7f35a169ded 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-interp-mov-lower.ll
@@ -25,10 +25,6 @@ define dllexport amdgpu_ps void @_amdgpu_ps_main(i32 inreg %arg) local_unnamed_a
   ret void
 }
 
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #2
-
 attributes #0 = { nounwind "InitialPSInputAddr"="0" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
index 2ef7bfb9b857f..3a1a831ff91bc 100644
--- a/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
+++ b/llvm/test/CodeGen/AMDGPU/diverge-switch-default.ll
@@ -59,7 +59,5 @@ sw.exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { convergent noinline optnone }
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll b/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll
index 422be3fa18e97..285d7905f0fa4 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-at-use.ll
@@ -40,5 +40,3 @@ exit:
   %old = atomicrmw add ptr addrspace(3) %gep, i32 %x acq_rel
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
index c135acaaf4744..09854f2de84c8 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-abs.ll
@@ -65,7 +65,5 @@ define amdgpu_kernel void @v_abs_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll
index 3e198b6dca8f0..a673437d4901d 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-bfe-isel.ll
@@ -20,6 +20,3 @@ define amdgpu_kernel void @bfe_divergent(i32 %val, ptr addrspace(1) %out) {
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-bitreverse.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-bitreverse.ll
index d5597df08aa26..92a8e1a569669 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-bitreverse.ll
@@ -37,7 +37,3 @@ define amdgpu_kernel void @divergent_bitreverse_i64(i64 %val, ptr addrspace(1) %
   store i64 %res, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.bitreverse.i32(i32)
-declare i64 @llvm.bitreverse.i64(i64)
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctlz-cttz.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctlz-cttz.ll
index 45cb4c623a4b2..98bb18d97f505 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctlz-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctlz-cttz.ll
@@ -56,9 +56,3 @@ define amdgpu_kernel void @v_flbit(ptr addrspace(1) noalias %out, ptr addrspace(
   ret void
 }
 
-
-declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-declare i32 @llvm.amdgcn.sffbh.i32(i32)
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll
index 5c5b7b3e5b9a6..c556635ba5b9e 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-ctpop.ll
@@ -45,9 +45,3 @@ define amdgpu_kernel void @v_ctpop_i64(ptr addrspace(1) noalias %out, ptr addrsp
   store i32 %truncctpop, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare i64 @llvm.ctpop.i64(i64) nounwind readnone
-
-declare i32 @llvm.ctpop.i32(i32) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
index a7f3c18accc93..2bbb0d19120e8 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-negsubinlineconst.ll
@@ -22,7 +22,5 @@ define amdgpu_kernel void @divergent_add_SIC(ptr addrspace(1) %out, ptr addrspac
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
index 8c3155fc5c6ea..7acdfdbe34d59 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-sext-inreg.ll
@@ -89,7 +89,5 @@ define amdgpu_kernel void @divergent_sext_in_reg_i16_to_i32(ptr addrspace(1) %ou
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll
index ccfc908b40052..75bd448a4290b 100644
--- a/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll
@@ -40,5 +40,3 @@ define i32 @divergent_xnor_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %res = xor i32 %xor, -1
   ret i32 %res
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
index 757458363284c..afacc1e7a6afe 100644
--- a/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
+++ b/llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
@@ -122,13 +122,10 @@ endloop:                                          ; preds = %if1, %Flow2
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.sqrt.f32(float) #0
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1
 
 ; Function Attrs: inaccessiblememonly nounwind writeonly
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2
 
 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
index dc79385d9eaca..318d70238c295 100644
--- a/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
+++ b/llvm/test/CodeGen/AMDGPU/divrem24-assume.ll
@@ -37,7 +37,4 @@ bb:
   ret void
 }
 
-declare void @llvm.assume(i1)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 !0 = !{i32 0, i32 1024}
diff --git a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
index b4218bc2afc7f..caefff3a5ea82 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp64_combine.ll
@@ -69,9 +69,4 @@ define amdgpu_kernel void @dpp64_div(ptr addrspace(1) %arg, i64 %in1) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
-declare double @llvm.ceil.f64(double)
-declare double @llvm.amdgcn.rcp.f64(double)
-
 attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
index cfc166ec798f4..c2dbfdc9a86d2 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.ll
@@ -47,9 +47,4 @@ define amdgpu_kernel void @dpp_fadd(ptr addrspace(1) %arg) {
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
-declare float @llvm.ceil.f32(float)
-
 attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
index 91962c19dac8d..b07420644cf7b 100644
--- a/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/drop-mem-operand-move-smrd.ll
@@ -41,7 +41,6 @@ exit:                                             ; preds = %for.body, %entry
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
index 26418b07be82b..8c990f628675c 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -mattr=+load-store-opt,+unsafe-ds-offset-folding < %s | FileCheck -check-prefix=CI --check-prefix=CHECK %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
-
 ; Function Attrs: nounwind
 ; CHECK-LABEL: {{^}}signed_ds_offset_addressing_loop:
 ; CHECK: BB0_1:
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index 7b9b130e1cf79..9ff3786f8bca2 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 @lds.obj = addrspace(3) global [256 x i32] undef, align 4
 
 define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 {
@@ -652,8 +650,6 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #
   ret void
 }
 
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
index 220f82f969754..c1cc2a2de83b0 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_gws_align.ll
@@ -52,8 +52,3 @@ bb:
   call void @llvm.amdgcn.ds.gws.init(i32 %agpr.1, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.ds.gws.init(i32, i32)
-declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32)
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 777a8f3fef1c1..90e23f271450e 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -1527,15 +1527,6 @@ entry:
 
 declare void @void_func_void() #3
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) nounwind readnone
-
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
index 3aff5ccff4280..9b95a497856d7 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2_superreg.ll
@@ -199,10 +199,8 @@ define amdgpu_kernel void @simple_read2_v4f32_superreg_scalar_loads_align4(ptr a
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
index d15183e57c938..f91a1c93502b7 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll
@@ -294,8 +294,5 @@ define amdgpu_kernel void @byte_size_only_divisible_64_read2_f64(ptr addrspace(1
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index 06908d21e5355..6f440a66e2076 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -1048,11 +1048,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
index 26ccc32e1bf67..267e8f502ea65 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2st64.ll
@@ -128,9 +128,6 @@ define amdgpu_kernel void @byte_size_only_divisible_64_write2st64_f64(ptr addrsp
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll b/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
index 6626945f49c8c..10cf7d664fd67 100644
--- a/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/dual-source-blend-export.ll
@@ -88,14 +88,6 @@ define amdgpu_ps void @_amdgpu_ps_main(i32 inreg %PrimMask, <2 x float> %InterpC
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #2
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #2
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32 immarg) #3
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #4
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #1
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #1
-declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
index 16f16f56248cb..cc5017bb5d49d 100644
--- a/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
@@ -4,7 +4,6 @@
 ; Don't crash.
 
 ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
 
 declare ptr @__kmpc_alloc_shared()
 
diff --git a/llvm/test/CodeGen/AMDGPU/elf.ll b/llvm/test/CodeGen/AMDGPU/elf.ll
index f51d9fc5125ba..3bbdeb085a1fe 100644
--- a/llvm/test/CodeGen/AMDGPU/elf.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf.ll
@@ -31,6 +31,4 @@ define amdgpu_ps void @test(i32 %p) #0 {
    ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/else.ll b/llvm/test/CodeGen/AMDGPU/else.ll
index 655c5cd184a1e..9d683db219e1e 100644
--- a/llvm/test/CodeGen/AMDGPU/else.ll
+++ b/llvm/test/CodeGen/AMDGPU/else.ll
@@ -52,9 +52,6 @@ end:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 immarg, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind writeonly }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
index 3ef0bb87f6e65..8abd62b3e00cd 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-no-signed-zeros-fp-math.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck --check-prefixes=GCN,GCN-FMF,GCN-SAFE %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; Test that the -enable-no-signed-zeros-fp-math flag works
 
 ; GCN-LABEL: {{^}}fneg_fsub_f32_fmf:
diff --git a/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll b/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
index 00c5e0abf6506..c31d49d38851f 100644
--- a/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
+++ b/llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
@@ -34,6 +34,4 @@ done:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
index 2e9bfc891629a..44cfdb52c1dfb 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extend-bit-ops-i16.ll
@@ -45,6 +45,4 @@ define amdgpu_kernel void @xor_zext(ptr addrspace(1) %out, ptr addrspace(1) %in)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll b/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll
index efdfd86107d29..7b3d95cc7d5d4 100644
--- a/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/extra-sroa-after-unroll.ll
@@ -75,7 +75,3 @@ entry:
   tail call void @llvm.memcpy.p0.p0.i32(ptr %d, ptr %s, i32 %N8, i1 false)
   ret void
 }
-
-declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
-declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)
-declare void @llvm.memcpy.p0.p0.i32(ptr nocapture, ptr nocapture, i32, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
index d0c0d3acf2717..162b6095bfad1 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_subvector_vec4_vec3.ll
@@ -31,6 +31,3 @@ main_body:
   call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> %tmp29, ptr addrspace(8) undef, i32 undef, i32 0, i32 0) #3
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
index 70011e56d016e..3d39b95133d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
@@ -954,7 +954,5 @@ define amdgpu_kernel void @v_extractelement_v16f16_dynamic_sgpr(ptr addrspace(1)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index b69852da24744..d96d5f466bbc0 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -263,7 +263,5 @@ define amdgpu_kernel void @v_extractelement_v16i16_dynamic_sgpr(ptr addrspace(1)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll
index d5464ce6aa8a3..42ff4ecb8d7e5 100644
--- a/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/extractelt-to-trunc.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn-- -verify-machineinstrs | FileCheck %s -check-prefix=GCN
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; Make sure the add and load are reduced to 32-bits even with the
 ; bitcast to vector.
 define amdgpu_kernel void @bitcast_int_to_vector_extract_0(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %b) {
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
index f34824cd6cefe..3d5ee54fe89a2 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f16.ll
@@ -747,11 +747,6 @@ define amdgpu_kernel void @v_extract_fabs_no_fold_v2f16(ptr addrspace(1) %in) #0
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
index 32d5fa6e72d79..8068ec3492eb3 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.f64.ll
@@ -1,11 +1,6 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 declare double @fabs(double) readnone
-declare double @llvm.fabs.f64(double) readnone
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
-declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
 
 ; FUNC-LABEL: {{^}}v_fabs_f64:
 ; SI: v_and_b32
diff --git a/llvm/test/CodeGen/AMDGPU/fabs.ll b/llvm/test/CodeGen/AMDGPU/fabs.ll
index e18c76f89b6c7..2ed6465f7b112 100644
--- a/llvm/test/CodeGen/AMDGPU/fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fabs.ll
@@ -108,6 +108,3 @@ define amdgpu_kernel void @bitpreserve_fabsf_f32(ptr addrspace(1) %out, float %i
 }
 
 declare float @fabsf(float) readnone
-declare float @llvm.fabs.f32(float) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
index b36c6e707ebab..eb1da93f76049 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd-fma-fmul-combine.ll
@@ -918,9 +918,6 @@ define amdgpu_kernel void @fast_sub_fmuladd_fpext_fmul_multi_use_fmuladd_rhs() #
   ret void
 }
 
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
index cdc6b5a48d0a6..6664a8dff9ca5 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -932,7 +932,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fadd64.ll b/llvm/test/CodeGen/AMDGPU/fadd64.ll
index 1d3a16e942cf0..78c0f8847ee88 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd64.ll
@@ -46,7 +46,5 @@ define amdgpu_kernel void @s_fadd_v2f64(ptr addrspace(1) %out, <2 x double> %r0,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
index 8ff78aaccf5a3..b5aec2e295985 100644
--- a/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/fail-select-buffer-atomic-fadd.ll
@@ -14,6 +14,4 @@ define amdgpu_cs void @atomic_fadd(<4 x i32> inreg %arg0) {
   ret void
 }
 
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
index f312f3138c08d..9f125d6a982a9 100644
--- a/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/fail.llvm.fptrunc.round.ll
@@ -7,5 +7,3 @@ define amdgpu_gs void @test_fptrunc_round_legalization(double %a, i32 %data0, <4
   store half %res, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare half @llvm.fptrunc.round.f64(double, metadata)
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index e1981972f58d1..f2ab2e2e5fa7e 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -859,33 +859,6 @@ define float @v_test_canonicalize_amdgcn_exp2(float %a) {
 ; in the .amd_amdgpu_isa "amdgcn-unknown-freebsd11.0--gfx802" directive
 ; GCN: .amd_amdgpu_isa
 
-declare float @llvm.canonicalize.f32(float) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare float @llvm.amdgcn.fmul.legacy(float, float) #0
-declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float) #0
-declare double @llvm.canonicalize.f64(double) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.sqrt.f32(float) #0
-declare float @llvm.ceil.f32(float) #0
-declare float @llvm.floor.f32(float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.sin.f32(float) #0
-declare float @llvm.cos.f32(float) #0
-declare half @llvm.sin.f16(half) #0
-declare half @llvm.cos.f16(half) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare double @llvm.maxnum.f64(double, double) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #0
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-declare float @llvm.amdgcn.frexp.mant.f32(float) #0
-declare float @llvm.amdgcn.log.f32(float) #0
-declare float @llvm.amdgcn.exp2.f32(float) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { "no-nans-fp-math"="true" }
 attributes #2 = { "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
index 581b7b4cff9ed..286a17f79dd98 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -4,20 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare <3 x half> @llvm.canonicalize.v3f16(<3 x half>) #0
-declare <4 x half> @llvm.canonicalize.v4f16(<4 x half>) #0
-declare <6 x half> @llvm.canonicalize.v6f16(<6 x half>) #0
-declare <8 x half> @llvm.canonicalize.v8f16(<8 x half>) #0
-declare <12 x half> @llvm.canonicalize.v12f16(<12 x half>) #0
-declare <16 x half> @llvm.canonicalize.v16f16(<16 x half>) #0
-declare <32 x half> @llvm.canonicalize.v32f16(<32 x half>) #0
-declare <64 x half> @llvm.canonicalize.v64f16(<64 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @test_fold_canonicalize_undef_value_f16(ptr addrspace(1) %out) #1 {
 ; VI-LABEL: test_fold_canonicalize_undef_value_f16:
 ; VI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
index d53c0411ad88c..2725b186099dd 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -5,21 +5,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.canonicalize.f32(float) #0
-declare <2 x float> @llvm.canonicalize.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.canonicalize.v3f32(<3 x float>) #0
-declare <4 x float> @llvm.canonicalize.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.canonicalize.v8f32(<8 x float>) #0
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.canonicalize.f64(double) #0
-declare <2 x double> @llvm.canonicalize.v2f64(<2 x double>) #0
-declare <3 x double> @llvm.canonicalize.v3f64(<3 x double>) #0
-declare <4 x double> @llvm.canonicalize.v4f64(<4 x double>) #0
-declare half @llvm.canonicalize.f16(half) #0
-declare <2 x half> @llvm.canonicalize.v2f16(<2 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @v_test_canonicalize_var_f32(ptr addrspace(1) %out) #1 {
 ; GFX678-LABEL: v_test_canonicalize_var_f32:
 ; GFX678:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fceil.ll b/llvm/test/CodeGen/AMDGPU/fceil.ll
index 193ab9519764a..514eaf6d7abcb 100644
--- a/llvm/test/CodeGen/AMDGPU/fceil.ll
+++ b/llvm/test/CodeGen/AMDGPU/fceil.ll
@@ -2,13 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
-declare float @llvm.ceil.f32(float) nounwind readnone
-declare <2 x float> @llvm.ceil.v2f32(<2 x float>) nounwind readnone
-declare <3 x float> @llvm.ceil.v3f32(<3 x float>) nounwind readnone
-declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone
-declare <8 x float> @llvm.ceil.v8f32(<8 x float>) nounwind readnone
-declare <16 x float> @llvm.ceil.v16f32(<16 x float>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}fceil_f32:
 ; SI: v_ceil_f32_e32
 ; EG: MEM_RAT_CACHELESS STORE_RAW [[RESULT:T[0-9]+\.[XYZW]]]
diff --git a/llvm/test/CodeGen/AMDGPU/fceil64.ll b/llvm/test/CodeGen/AMDGPU/fceil64.ll
index f34a64c470c4e..3581f2470bba1 100644
--- a/llvm/test/CodeGen/AMDGPU/fceil64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fceil64.ll
@@ -2,13 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=CI -check-prefix=FUNC %s
 
-declare double @llvm.ceil.f64(double) nounwind readnone
-declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.ceil.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.ceil.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}fceil_f64:
 ; CI: v_ceil_f64_e32
 ; SI: s_bfe_u32 [[SEXP:s[0-9]+]], {{s[0-9]+}}, 0xb0014
diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
index 7d8f43bbe16b7..1d9f3df4ceafc 100644
--- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
@@ -2803,7 +2803,5 @@ entry:
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fconst64.ll b/llvm/test/CodeGen/AMDGPU/fconst64.ll
index ab5a38949295f..45aac37c1a48b 100644
--- a/llvm/test/CodeGen/AMDGPU/fconst64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fconst64.ll
@@ -14,7 +14,5 @@ define amdgpu_kernel void @fconst_f64(ptr addrspace(1) %out, ptr addrspace(1) %i
    ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
index b8936911f0576..885cce43e57bc 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f16.ll
@@ -4,14 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -enable-var-scope --check-prefixes=GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -enable-var-scope --check-prefixes=GFX11 %s
 
-declare half @llvm.copysign.f16(half, half) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare double @llvm.copysign.f64(double, double) #0
-declare <2 x half> @llvm.copysign.v2f16(<2 x half>, <2 x half>) #0
-declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>) #0
-declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @s_copysign_f16(ptr addrspace(1) %arg_out, half %mag, half %sign) {
 ; SI-LABEL: s_copysign_f16:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
index f48961c905f58..b25b2b38ba1a4 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.ll
@@ -1145,10 +1145,4 @@ define amdgpu_kernel void @s_test_copysign_f32_fpext_bf16(ptr addrspace(1) %out,
   ret void
 }
 
-declare float @llvm.copysign.f32(float, float) #0
-declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) #0
-declare <3 x float> @llvm.copysign.v3f32(<3 x float>, <3 x float>) #0
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) #0
-declare <5 x float> @llvm.copysign.v5f32(<5 x float>, <5 x float>) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.r600.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.r600.ll
index c473175c42360..e9b8d83b403e3 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f32.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f32.r600.ll
@@ -1,9 +1,5 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG %s
 
-declare float @llvm.copysign.f32(float, float) nounwind readnone
-declare <2 x float> @llvm.copysign.v2f32(<2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) nounwind readnone
-
 ; EG: BFI_INT
 define amdgpu_kernel void @test_copysign_f32(ptr addrspace(1) %out, float %mag, float %sign) nounwind {
   %result = call float @llvm.copysign.f32(float %mag, float %sign)
diff --git a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
index b5fa3fd9eccc1..a255e47ea2840 100644
--- a/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcopysign.f64.ll
@@ -3,11 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=SIVI,VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare double @llvm.copysign.f64(double, double) #0
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) #0
-declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>) #0
-declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) #0
-
 define amdgpu_kernel void @s_test_copysign_f64(ptr addrspace(1) %out, [8 x i32], double %mag, [8 x i32], double %sign) {
 ; SI-LABEL: s_test_copysign_f64:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
index b14b6421f56b4..d0b56170e966e 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv.f16.ll
@@ -2145,11 +2145,6 @@ define <2 x half> @v_neg_rsq_v2f16(<2 x half> %a) {
   ret <2 x half> %fdiv
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare half @llvm.sqrt.f16(half) #2
-declare half @llvm.fabs.f16(half) #2
-declare <2 x half> @llvm.sqrt.v2f16(<2 x half>) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
index 5ae989603b31a..2fcb967ee007a 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv_flags.f32.ll
@@ -1834,6 +1834,4 @@ define float @v_recip_sqrt_f32_afn_ulp25_contract(float %x) {
   ret float %fdiv
 }
 
-declare float @llvm.sqrt.f32(float)
-
 !0 = !{float 2.500000e+00}
diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
index e61f60f0fea55..985fcece0d9aa 100644
--- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll
@@ -1,12 +1,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 ; RUN: llvm-as -data-layout=A5 < %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare void @llvm.amdgcn.s.barrier()
-
 @test_local.temp = internal addrspace(3) global [1 x i32] undef, align 4
 @test_global_local.temp = internal addrspace(3) global [1 x i32] undef, align 4
 
diff --git a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
index 8155ac7eb256e..d98fbf1b1489b 100644
--- a/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/fence-lds-read2-write2.ll
@@ -61,9 +61,6 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.s.barrier() #1
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { convergent nounwind }
 
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
index c25aea1bd355e..109028b566f26 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll
@@ -48,9 +48,6 @@ entry:
   ret void
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
index 69ddd82307c58..f63e20cb7b9ee 100644
--- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
+++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll
@@ -89,9 +89,6 @@ entry:
   ret void
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
index 09e96fea8387d..09d4882a6dee3 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.f64.ll
@@ -2,14 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare double @llvm.fabs.f64(double %Val)
-declare double @llvm.floor.f64(double) nounwind readnone
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.floor.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.floor.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}ffloor_f64:
 ; CI: v_floor_f64_e32
 ; SI: v_fract_f64_e32
diff --git a/llvm/test/CodeGen/AMDGPU/ffloor.ll b/llvm/test/CodeGen/AMDGPU/ffloor.ll
index dda5c16ff288c..04e8ff7be36e2 100644
--- a/llvm/test/CodeGen/AMDGPU/ffloor.ll
+++ b/llvm/test/CodeGen/AMDGPU/ffloor.ll
@@ -38,12 +38,9 @@ define amdgpu_kernel void @floor_v4f32(ptr addrspace(1) %out, <4 x float> %in) {
 }
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.floor.f32(float) #0
 
 ; Function Attrs: nounwind readonly
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.floor.v4f32(<4 x float>) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
index a78a8a2ed8fe6..09420b0c90481 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-nondeterminism.ll
@@ -48,5 +48,3 @@ bb4:
   call void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float> %i10, <4 x i32> zeroinitializer, i32 0, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
index 9b20d9be278c6..5accf7e8a82d0 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-wwm-vgpr-copy.ll
@@ -76,11 +76,6 @@ work:
   br i1 %tmp34, label %bb602, label %bb42
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i32 @llvm.amdgcn.wwm.i32(i32) #1
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #2
-
 attributes #0 = { convergent nounwind readnone willreturn }
 attributes #1 = { convergent nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind willreturn memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
index ce4beb8789dc3..1456a697becb3 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f32.ll
@@ -118,6 +118,4 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data
   ret float %ret
 }
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
index afd38136870b8..1f334ab2dc37f 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.f64.ll
@@ -86,6 +86,4 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
   ret double %ret
 }
 
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr, double)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll
index ebd6f18de19d8..c94189d7df486 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-atomic-fadd.v2f16.ll
@@ -33,5 +33,3 @@ define amdgpu_ps <2 x half> @flat_atomic_fadd_v2f16_rtn_intrinsic(ptr %ptr, <2 x
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr, <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
index 962d49df8509e..f996e66ca6300 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
@@ -19,8 +19,6 @@
     ret void
   }
 
-  declare i32 @llvm.amdgcn.workitem.id.x()
-
 ...
 ---
 name:            flat_load_clustering
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
index 0af57c6a97db5..3dcd5b3bb2805 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch-svs.ll
@@ -9,8 +9,6 @@
 ; Test flat scratch SVS addressing mode with various combinations of alignment
 ; of soffset, voffset and inst_offset.
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @soff1_voff1(i32 %soff) {
 ; GFX940-SDAG-LABEL: soff1_voff1:
 ; GFX940-SDAG:       ; %bb.0: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 850be72f06c7d..783c4909f41d0 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -4892,6 +4892,3 @@ bb:
   call void asm sideeffect "; use $0", "s"(ptr addrspace(5) %alloca2) #0
   ret void
 }
-
-declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/floor.ll b/llvm/test/CodeGen/AMDGPU/floor.ll
index cc23289b20b0a..ad8af1ee93d29 100644
--- a/llvm/test/CodeGen/AMDGPU/floor.ll
+++ b/llvm/test/CodeGen/AMDGPU/floor.ll
@@ -10,5 +10,4 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
 }
 
 declare float @floorf(float) readonly
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
index bac2d8b8b40c2..a9d46db9c7253 100644
--- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll
@@ -11,12 +11,6 @@
 ; beneficial even without fp32 denormals, but they do require no-infs-fp-math
 ; for correctness.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.fma.f64(double, double, double) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
-
 ; (fadd (fmul x, y), z) -> (fma x, y, z)
 define amdgpu_kernel void @combine_to_fma_f64_0(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #1 {
 ; SI-LABEL: combine_to_fma_f64_0:
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
index 005e40159f61b..ff32988b76a0d 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll
@@ -6,9 +6,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
 
-declare half @llvm.fma.f16(half, half, half)
-declare half @llvm.maxnum.f16(half, half)
-
 define half @test_fma(half %x, half %y, half %z) {
 ; GFX9-LABEL: test_fma:
 ; GFX9:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fma.f64.ll b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
index 99f7d4da685d6..1d9b2cb47af2a 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.f64.ll
@@ -3,11 +3,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx90a -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,GFX90A %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=FUNC,GCN,SIGFX11 %s
 
-declare double @llvm.fma.f64(double, double, double) nounwind readnone
-declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-declare <4 x double> @llvm.fma.v4f64(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
-declare double @llvm.fabs.f64(double) nounwind readnone
-
 ; FUNC-LABEL: {{^}}fma_f64:
 ; SIGFX11: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
 ; GFX90A: v_fmac_f64_e32 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/fma.ll b/llvm/test/CodeGen/AMDGPU/fma.ll
index 93ed64d93b8ba..77134af53b341 100644
--- a/llvm/test/CodeGen/AMDGPU/fma.ll
+++ b/llvm/test/CodeGen/AMDGPU/fma.ll
@@ -9,12 +9,6 @@
 ; RUN:  not llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mcpu=caicos -verify-machineinstrs < %s
 ; RUN:  not llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mcpu=turks -verify-machineinstrs < %s
 
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}fma_f32:
 ; SI: v_fma_f32 {{v[0-9]+, v[0-9]+, v[0-9]+, v[0-9]+}}
 ; GFX906: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
index 4f49d4ce3a040..b642086c15cbf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmac.sdwa.ll
@@ -93,7 +93,5 @@ bb:
   ret <4 x half> %tmp11
 }
 
-declare half @llvm.fmuladd.f16(half, half, half)
-
 attributes #0 = { convergent nounwind readonly}
 attributes #1 = { norecurse nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmad.ll b/llvm/test/CodeGen/AMDGPU/fmad.ll
index a1d62228fd4b6..8c490be9c1ffc 100644
--- a/llvm/test/CodeGen/AMDGPU/fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmad.ll
@@ -14,4 +14,3 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
 }
 
 declare float @fabs(float ) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/fmax.ll b/llvm/test/CodeGen/AMDGPU/fmax.ll
index 825ad93b896c5..555167988c216 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax.ll
@@ -11,5 +11,3 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
    call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
index 83a49445667b3..24482aa7f51fd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.f64.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare double @llvm.maxnum.f64(double, double) nounwind readnone
-
 ; SI-LABEL: {{^}}test_fmax3_f64:
 ; SI: buffer_load_dwordx2 [[REGA:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 glc{{$}}
 ; SI: buffer_load_dwordx2 [[REGB:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:8
diff --git a/llvm/test/CodeGen/AMDGPU/fmax3.ll b/llvm/test/CodeGen/AMDGPU/fmax3.ll
index 23eb73038917d..e5e2a1b8a77bd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax3.ll
@@ -609,11 +609,6 @@ entry:
   ret <2 x half> %res
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
index 01b2f207388e8..92fe4c971007a 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.f64.ll
@@ -212,7 +212,5 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index 1da621cb9f09d..1ee359f559baf 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -6,8 +6,6 @@
 
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; FUNC-LABEL: {{^}}test_fmax_legacy_uge_f32:
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum.ll b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
index dd685a6169d84..7f7deae12d073 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum.ll
@@ -293,19 +293,6 @@ define amdgpu_kernel void @fmaximum_f16_move_to_valu(ptr addrspace(1) %out, ptr
   store half %v, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare float @llvm.maximum.f32(float, float)
-declare <2 x float> @llvm.maximum.v2f32(<2 x float>, <2 x float>)
-declare <3 x float> @llvm.maximum.v3f32(<3 x float>, <3 x float>)
-declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
-declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>)
-declare half @llvm.maximum.f16(half, half)
-declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>)
-declare <3 x half> @llvm.maximum.v3f16(<3 x half>, <3 x half>)
-declare <4 x half> @llvm.maximum.v4f16(<4 x half>, <4 x half>)
-declare double @llvm.maximum.f64(double, double)
-declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-GISEL: {{.*}}
 ; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
index 6e45084dc4b80..3198dd9f8ae9e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaximum3.ll
@@ -91,8 +91,3 @@ define amdgpu_kernel void @no_fmaximum3_olt_0_f64(ptr addrspace(1) %out, ptr add
   store double %f1, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare double @llvm.maximum.f64(double, double)
-declare float @llvm.maximum.f32(float, float)
-declare half @llvm.maximum.f16(half, half)
-declare <2 x half> @llvm.maximum.v2f16(<2 x half>, <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
index 34e1cb89607b9..04726c0e97156 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.f64.ll
@@ -1,12 +1,6 @@
 ; RUN: llc -mtriple=amdgcn < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare double @llvm.maxnum.f64(double, double) #0
-declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) #0
-declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) #0
-declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) #0
-declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) #0
-
 ; FUNC-LABEL: @test_fmax_f64
 ; SI: v_max_f64
 define amdgpu_kernel void @test_fmax_f64(ptr addrspace(1) %out, double %a, double %b) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
index 38640a18b5aee..00d78a2fab7e1 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.ll
@@ -210,13 +210,5 @@ define <3 x float> @test_func_fmax_v3f32(<3 x float> %a, <3 x float> %b) #0 {
   ret <3 x float> %val
 }
 
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
-declare double @llvm.maxnum.f64(double, double)
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
index f5c55ffef30a1..1ca64f5c7e710 100644
--- a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll
@@ -192,12 +192,5 @@ define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a)
   ret void
 }
 
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) #1
-declare double @llvm.maxnum.f64(double, double)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
index 99b163dc9753b..d66895466ba96 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3-cast-combine.ll
@@ -11,11 +11,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s
 ; RUN: llc -global-isel=1 -global-isel-abort=2 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s
 
-
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
-
 define half @fmed3_f32_fpext_f16(half %arg0, half %arg1, half %arg2) #1 {
 ; GFX7-SDAG-LABEL: fmed3_f32_fpext_f16:
 ; GFX7-SDAG:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fmed3.ll b/llvm/test/CodeGen/AMDGPU/fmed3.ll
index 764fb992d4d34..d40757cb4c006 100644
--- a/llvm/test/CodeGen/AMDGPU/fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmed3.ll
@@ -8250,16 +8250,6 @@ define amdgpu_kernel void @two_non_inline_constant_multi_use(ptr addrspace(1) %o
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare double @llvm.minnum.f64(double, double) #0
-declare double @llvm.maxnum.f64(double, double) #0
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.minnum.f16(half, half) #0
-declare half @llvm.maxnum.f16(half, half) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="false" }
 attributes #2 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin.ll b/llvm/test/CodeGen/AMDGPU/fmin.ll
index e7cb2d7532483..1ceae7a4d7cbd 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin.ll
@@ -11,5 +11,3 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
    call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/fmin3.ll b/llvm/test/CodeGen/AMDGPU/fmin3.ll
index 7337d90b4bea6..63a82e9556315 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin3.ll
@@ -902,12 +902,6 @@ define amdgpu_kernel void @test_fmin3_olt_1_f64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.minnum.f64(double, double) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
index d20c39d510364..921c442b4214e 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.f64.ll
@@ -418,7 +418,5 @@ define amdgpu_kernel void @test_fmin_legacy_olt_f64(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 8e595a827c78d..5dfde839ad512 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -6,8 +6,6 @@
 
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope --check-prefixes=EG,FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; The two inputs to the instruction are different SGPRs from the same
 ; super register, so we can't fold both SGPR operands even though they
 ; are both the same register.
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum.ll b/llvm/test/CodeGen/AMDGPU/fminimum.ll
index 2b3cc4fd73858..cdf93b1c9e37a 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum.ll
@@ -293,19 +293,6 @@ define amdgpu_kernel void @fminimum_f16_move_to_valu(ptr addrspace(1) %out, ptr
   store half %v, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare float @llvm.minimum.f32(float, float)
-declare <2 x float> @llvm.minimum.v2f32(<2 x float>, <2 x float>)
-declare <3 x float> @llvm.minimum.v3f32(<3 x float>, <3 x float>)
-declare <4 x float> @llvm.minimum.v4f32(<4 x float>, <4 x float>)
-declare <16 x float> @llvm.minimum.v16f32(<16 x float>, <16 x float>)
-declare half @llvm.minimum.f16(half, half)
-declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>)
-declare <3 x half> @llvm.minimum.v3f16(<3 x half>, <3 x half>)
-declare <4 x half> @llvm.minimum.v4f16(<4 x half>, <4 x half>)
-declare double @llvm.minimum.f64(double, double)
-declare <2 x double> @llvm.minimum.v2f64(<2 x double>, <2 x double>)
-declare <4 x double> @llvm.minimum.v4f64(<4 x double>, <4 x double>)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-GISEL: {{.*}}
 ; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/fminimum3.ll b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
index eef271e69a384..7469810745533 100644
--- a/llvm/test/CodeGen/AMDGPU/fminimum3.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminimum3.ll
@@ -91,8 +91,3 @@ define amdgpu_kernel void @no_fminimum3_olt_0_f64(ptr addrspace(1) %out, ptr add
   store double %f1, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare double @llvm.minimum.f64(double, double)
-declare float @llvm.minimum.f32(float, float)
-declare half @llvm.minimum.f16(half, half)
-declare <2 x half> @llvm.minimum.v2f16(<2 x half>, <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll b/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
index 077b76b8cb281..97dded01cc056 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.f64.ll
@@ -2,12 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX678 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare double @llvm.minnum.f64(double, double) #0
-declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
-declare <4 x double> @llvm.minnum.v4f64(<4 x double>, <4 x double>) #0
-declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) #0
-declare <16 x double> @llvm.minnum.v16f64(<16 x double>, <16 x double>) #0
-
 ; GCN-LABEL: {{^}}test_fmin_f64_ieee_noflush:
 ; GCN: s_load_dwordx2 [[A:s\[[0-9]+:[0-9]+\]]]
 ; GCN: s_load_dwordx2 [[B:s\[[0-9]+:[0-9]+\]]]
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.ll b/llvm/test/CodeGen/AMDGPU/fminnum.ll
index 65b311845a6b7..5287fc9aa73d0 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.ll
@@ -218,12 +218,5 @@ define <3 x float> @test_func_fmin_v3f32(<3 x float> %a, <3 x float> %b) nounwin
   ret <3 x float> %val
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
index 22ccb718a3d28..ffb0097b32a65 100644
--- a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll
@@ -192,11 +192,5 @@ define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a)
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
-declare <8 x float> @llvm.minnum.v8f32(<8 x float>, <8 x float>) #1
-declare <16 x float> @llvm.minnum.v16f32(<16 x float>, <16 x float>) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 7830c91851bfa..ff82757e5b56d 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -9,9 +9,6 @@
 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
 ; make add an instruction if the fadd has more than one use.
 
-declare half @llvm.fabs.f16(half) #1
-declare float @llvm.fabs.f32(float) #1
-
 define amdgpu_kernel void @multiple_fadd_use_test_f32(ptr addrspace(1) %out, float %x, float %y, float %z) #0 {
 ; VI-LABEL: multiple_fadd_use_test_f32:
 ; VI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 711a5fff1a063..40ca18c323111 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -8,14 +8,6 @@
 
 ; Test fmul by power of 2 which is better emitted as ldexp
 
-declare half @llvm.fabs.f16(half)
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
-declare float @llvm.fabs.f32(float)
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-declare double @llvm.fabs.f64(double)
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-declare i32 @llvm.amdgcn.readfirstlane(i32)
-
 define float @v_mul_42_f32(float %x) {
 ; GCN-LABEL: v_mul_42_f32:
 ; GCN:       ; %bb.0:
@@ -7246,8 +7238,4 @@ define double @v_mul_fabs_8_f64(double %x) {
   ret double %mul
 }
 
-declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
-declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
index 718be90eb75fc..658c9da971800 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -15,10 +15,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=on -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DENORM,GFX11-DENORM-STRICT %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-DENORM,GFX11-DENORM-CONTRACT %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare half @llvm.fabs.f16(half) #1
-
 define amdgpu_kernel void @fmuladd_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1,
 ; VI-FLUSH-LABEL: fmuladd_f16:
 ; VI-FLUSH:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 945973b277289..aab8f1ee12018 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -24,12 +24,6 @@
 
 target triple = "amdgcn--"
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare float @llvm.fabs.f32(float) #1
-
 ; GCN-LABEL: {{^}}fmuladd_f32:
 ; GCN-FLUSH-MAD: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 ; GCN-FLUSH-FMAC: v_fmac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
index ff89cc21b56da..9d1a114b7b14c 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll
@@ -189,8 +189,5 @@ define amdgpu_kernel void @fadd_a_a_b_f64_fast(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fmuladd.f64(double, double, double) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
index 0c40fe0215b5d..06d9d80fc7ee4 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.v2f16.ll
@@ -8,10 +8,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9-DENORM-CONTRACT,GFX9-DENORM %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-
 ; GCN-LABEL: {{^}}fmuladd_v2f16:
 ; GFX9-FLUSH: v_pk_mul_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
 ; GFX9-FLUSH: v_pk_add_f16 {{v[0-9]+, v[0-9]+, v[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
index f411a76e75ab6..5e33fd9f3227f 100644
--- a/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fnearbyint.ll
@@ -4,14 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare half @llvm.nearbyint.f16(half) #0
-declare float @llvm.nearbyint.f32(float) #0
-declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) #0
-declare double @llvm.nearbyint.f64(double) #0
-declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) #0
-
 define amdgpu_kernel void @fnearbyint_f16(ptr addrspace(1) %out, half %in) #1 {
 ; SI-LABEL: fnearbyint_f16:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll
index f90b79c472ab7..80ee959aea164 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines-gfx1200.ll
@@ -264,7 +264,3 @@ define void @v_fneg_maximum_multi_use_maximum_f32(ptr addrspace(1) %out, ptr add
   store volatile float %use1, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare float @llvm.minimum.f32(float, float)
-declare float @llvm.maximum.f32(float, float)
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index b32630a97b3ad..d638b1977de5c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -4741,21 +4741,6 @@ define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x ha
   ret <2 x half> %add
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.sin.f16(half) #1
-declare half @llvm.trunc.f16(half) #1
-declare half @llvm.round.f16(half) #1
-declare half @llvm.rint.f16(half) #1
-declare half @llvm.nearbyint.f16(half) #1
-declare half @llvm.roundeven.f16(half) #1
-declare half @llvm.canonicalize.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare half @llvm.fma.f16(half, half, half) #1
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
index d8bbda19453fa..09982ca17d462 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
@@ -151,12 +151,6 @@ define half @v_fneg_arithmetic_fence_fmul_f16(half %a, half %b) #0 {
   ret half %fneg
 }
 
-declare half @llvm.amdgcn.rcp.f16(half) #1
-declare half @llvm.amdgcn.sin.f16(half) #1
-declare half @llvm.arithmetic.fence.f16(half) #1
-declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
-declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 0cb4b8c960bbf..7923be02d79d0 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -2811,30 +2811,6 @@ define amdgpu_kernel void @v_fneg_negk_select_f32(ptr addrspace(1) %out, ptr add
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.fma.f64(double, double, double) #1
-
-declare float @llvm.amdgcn.sin.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index b5440b9c38c9f..2096f91043ed9 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -3397,47 +3397,6 @@ bb:
   ret float %i1
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.roundeven.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.arithmetic.fence.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.fma.f64(double, double, double) #1
-
-declare float @llvm.amdgcn.sin.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-declare half @llvm.fma.f16(half, half, half) #1
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>)
-declare half @llvm.fmuladd.f16(half, half, half) #1
-declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1
-declare half @llvm.sin.f16(half) #1
-declare half @llvm.trunc.f16(half) #1
-declare half @llvm.round.f16(half) #1
-declare half @llvm.rint.f16(half) #1
-declare half @llvm.nearbyint.f16(half) #1
-declare half @llvm.canonicalize.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare half @llvm.amdgcn.sin.f16(half) #1
-declare half @llvm.amdgcn.rcp.f16(half) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
index 0ad61066b4b21..e04515db20809 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.si.ll
@@ -21,8 +21,5 @@ define amdgpu_kernel void @v_fneg_rcp_legacy_f32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll
index 07f315ea6cb48..74cdf4d1ee6bd 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs-divergence-driven-isel.ll
@@ -494,11 +494,3 @@ define amdgpu_kernel void @uniform_fneg_fabs_f64(ptr addrspace(1) %out, ptr addr
   store double %fneg, ptr addrspace(1) %out.gep
   ret void
 }
-
-declare float @llvm.fabs.f32(float)
-declare half @llvm.fabs.f16(half)
-declare double @llvm.fabs.f64(double)
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
index 4364b32e62f8c..b931a46b7518d 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f16.ll
@@ -689,9 +689,5 @@ define amdgpu_kernel void @s_fneg_multi_use_fabs_foldable_neg_v2f16(ptr addrspac
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
index 5f1d232daabe5..6a47e9a55581b 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.f64.ll
@@ -93,6 +93,3 @@ define amdgpu_kernel void @fneg_fabs_v4f64(ptr addrspace(1) %out, <4 x double> %
 }
 
 declare double @fabs(double) readnone
-declare double @llvm.fabs.f64(double) readnone
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) readnone
-declare <4 x double> @llvm.fabs.v4f64(<4 x double>) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
index b0c17828cb13b..69da3dca7f20a 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-fabs.ll
@@ -106,9 +106,6 @@ define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %
 }
 
 declare float @fabsf(float) readnone
-declare float @llvm.fabs.f32(float) readnone
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
index cd1ec85eb8d0f..f2ea4c5e72675 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll
@@ -1597,7 +1597,4 @@ bb5:                                              ; preds = %bb, %.entry
   ret void
 }
 
-
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0
-
 attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
index 31c1389c94020..bb1f09e35e699 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -671,7 +671,5 @@ define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(ptr addrspace(1) %in) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
index bb2bad9d3521a..aa0f884740675 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-fabs.ll
@@ -233,6 +233,3 @@ exit:
   %ret.2 = fmul reassoc nnan nsz arcp contract afn float %2, %ret
   ret float %ret.2
 }
-
-declare float @llvm.fabs.f32(float)
-declare float @llvm.fmuladd.f32(float, float, float) #0
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll
index 4edf4c44f651c..9aeb34d8a5d3b 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-fmaak-bug.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix GFX10
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg)
-
 ; Check that this constant is not folded into the v_fmaak_f32 instruction.
 ; GFX10-LABEL: _amdgpu_ps_main:
 ; GFX10: v_mov_b32_e32 v1, 0x40490fdb
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll b/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
index 8401e04d27233..26b22acb609b6 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-fmul-to-neg-abs.ll
@@ -32,6 +32,4 @@ define amdgpu_kernel void @fold_mul_abs(ptr addrspace(1) %arg) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
index 7c5f6d5e33efe..48a62fc6bced0 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fold-int-pow2-with-fmul-or-fdiv.ll
@@ -3,11 +3,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare i16 @llvm.umax.i16(i16, i16)
-declare i64 @llvm.umin.i64(i64, i64)
-
-declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>)
-
 define <4 x float> @fmul_pow2_4xfloat(<4 x i32> %i) {
 ; VI-LABEL: fmul_pow2_4xfloat:
 ; VI:       ; %bb.0:
@@ -145,8 +140,6 @@ define <4 x float> @fdiv_pow2_4xfloat(<4 x i32> %i) {
   ret <4 x float> %r
 }
 
-declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
-
 define <8 x half> @fmul_pow2_8xhalf(<8 x i16> %i) {
 ; VI-LABEL: fmul_pow2_8xhalf:
 ; VI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll
index b35de03203004..41fc2d18b9960 100644
--- a/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll
+++ b/llvm/test/CodeGen/AMDGPU/force-store-sc0-sc1.ll
@@ -136,6 +136,3 @@ define amdgpu_kernel void @store_global_atomic_system(ptr addrspace(1) %ptr) {
   store atomic float 1.000000e+00, ptr addrspace(1) %ptr monotonic, align 4
   ret void
 }
-
-
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll
index 2f29a1a9aa768..dd544f97a394c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx1200.ll
@@ -2,17 +2,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12-SDAG
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs | FileCheck %s -check-prefix=GFX12-GISEL
 
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare <2 x bfloat> @llvm.amdgcn.struct.buffer.atomic.fadd.v2bf16(<2 x bfloat>, <4 x i32>, i32, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32)
-declare <2 x bfloat> @llvm.amdgcn.raw.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, <4 x i32> %rsrc, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
-declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
-declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
-declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
-declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
-
 define amdgpu_kernel void @local_atomic_fadd_v2f16_noret(ptr addrspace(3) %ptr, <2 x half> %data) {
 ; GFX12-SDAG-LABEL: local_atomic_fadd_v2f16_noret:
 ; GFX12-SDAG:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
index 50a27d42322d7..b589067f6e0bc 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-atomics-gfx940.ll
@@ -1,14 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
 
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p0.v2f16(ptr %ptr, <2 x half> %data)
-
 ; bf16 atomics use v2i16 argument since there is no bf16 data type in the llvm.
-declare <2 x i16> @llvm.amdgcn.flat.atomic.fadd.v2bf16.p0(ptr %ptr, <2 x i16> %data)
-declare <2 x i16> @llvm.amdgcn.global.atomic.fadd.v2bf16.p1(ptr addrspace(1) %ptr, <2 x i16> %data)
-declare <2 x half> @llvm.amdgcn.ds.fadd.v2f16(ptr addrspace(3) %ptr, <2 x half> %data, i32, i32, i1)
-declare <2 x i16> @llvm.amdgcn.ds.fadd.v2bf16(ptr addrspace(3) %ptr, <2 x i16> %data)
 
 define amdgpu_kernel void @flat_atomic_fadd_f32_noret(ptr %ptr, float %data) {
 ; GFX940-LABEL: flat_atomic_fadd_f32_noret:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-classify.ll b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
index 18d2e52e8f900..5c2a8c9a55a78 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-classify.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-classify.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare float @llvm.fabs.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
-
 define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) #0 {
 ; SI-LABEL: test_isinf_pattern:
 ; SI:       ; %bb.0:
@@ -772,7 +769,5 @@ define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocaptur
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
index 0c62b52eb92af..811b717fb7e8c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
@@ -11,9 +11,6 @@
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.raw.buffer.atomic.fmax.f32(float, <4 x i32>, i32, i32, i32 immarg)
-
 
 define amdgpu_kernel void @raw_buffer_atomic_min_noret_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
 ; SI-LABEL: raw_buffer_atomic_min_noret_f32:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll
index e3ed0fa491884..10b07d8a092ae 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-ptr-atomics.ll
@@ -11,9 +11,6 @@
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1100
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-
 
 define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f32(ptr addrspace(8) inreg %rsrc, float %data, i32 %vindex) {
 ; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f32:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll
index 8633a3965259b..067f8d840e838 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics-f64.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
 
-declare double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr %ptr, double %data)
-
 define amdgpu_cs void @flat_atomic_fmin_f64_noret(ptr %ptr, double %data) {
 ; GFX10-LABEL: flat_atomic_fmin_f64_noret:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
index 1d2e3fc636f44..730c8f93d8328 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-flat-atomics.ll
@@ -4,9 +4,6 @@
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
 
-declare float @llvm.amdgcn.flat.atomic.fmin.f32.p1.f32(ptr %ptr, float %data)
-declare float @llvm.amdgcn.flat.atomic.fmax.f32.p1.f32(ptr %ptr, float %data)
-
 define amdgpu_cs void @flat_atomic_fmin_f32_noret(ptr %ptr, float %data) {
 ; GFX10-LABEL: flat_atomic_fmin_f32_noret:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll
index bb06ee3165e3a..94918870f3173 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics-f64.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
 
-declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-
 define amdgpu_cs void @global_atomic_fmin_f64_noret(ptr addrspace(1) %ptr, double %data) {
 ; GFX10-LABEL: global_atomic_fmin_f64_noret:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll
index 699bb8b41b69d..4e93de1e33e6b 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-global-atomics.ll
@@ -4,9 +4,6 @@
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
 
-declare float @llvm.amdgcn.global.atomic.fmin.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
-declare float @llvm.amdgcn.global.atomic.fmax.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
-
 define amdgpu_cs void @global_atomic_fmin_f32_noret(ptr addrspace(1) %ptr, float %data) {
 ; GFX10-LABEL: global_atomic_fmin_f32_noret:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-image-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-image-atomics.ll
index 71b0abe183396..6bfb7e8d453e8 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-image-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-image-atomics.ll
@@ -14,9 +14,6 @@
 ; GFX11-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.atomic.f
 ; G_GFX11-ERR: LLVM ERROR: cannot select: {{.*}} = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.atomic.f
 
-declare float @llvm.amdgcn.image.atomic.fmin.1d.f32.f32(float, i32, <8 x i32>, i32, i32)
-declare float @llvm.amdgcn.image.atomic.fmax.1d.f32.f32(float, i32, <8 x i32>, i32, i32)
-
 
 define amdgpu_ps float @atomic_fmin_1d(<8 x i32> inreg %rsrc, float %data, i32 %s) {
 ; SI-LABEL: atomic_fmin_1d:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
index a0119a22317a6..8dba1034df1de 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-flat-atomics.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
 
-declare float @llvm.amdgcn.flat.atomic.fmin.num.f32.p1.f32(ptr %ptr, float %data)
-declare float @llvm.amdgcn.flat.atomic.fmax.num.f32.p1.f32(ptr %ptr, float %data)
-
 define amdgpu_cs void @flat_atomic_fmin_num_f32_noret(ptr %ptr, float %data) {
 ; GFX12-LABEL: flat_atomic_fmin_num_f32_noret:
 ; GFX12:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
index abe7a5cc43f4f..809b96e3d1f97 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-min-max-num-global-atomics.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG
 ; RUN: llc < %s -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL
 
-declare float @llvm.amdgcn.global.atomic.fmin.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
-declare float @llvm.amdgcn.global.atomic.fmax.num.f32.p1.f32(ptr addrspace(1) %ptr, float %data)
-
 define amdgpu_cs void @global_atomic_fmin_num_f32_noret(ptr addrspace(1) %ptr, float %data) {
 ; GFX12-LABEL: global_atomic_fmin_num_f32_noret:
 ; GFX12:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
index d827ea0503a3b..595ba8f608693 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp32.ll
@@ -5,8 +5,6 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=CYPRESS %s
 ; RUN: llc -mtriple=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefixes=CAYMAN %s
 
-declare float @llvm.convert.from.fp16.f32(i16) nounwind readnone
-
 define amdgpu_kernel void @test_convert_fp16_to_fp32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; GFX6-LABEL: test_convert_fp16_to_fp32:
 ; GFX6:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
index 03b8251ea4640..f76e325d45810 100644
--- a/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp16_to_fp64.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare double @llvm.convert.from.fp16.f64(i16) nounwind readnone
-
 define amdgpu_kernel void @test_convert_fp16_to_fp64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; GFX6-LABEL: test_convert_fp16_to_fp64:
 ; GFX6:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
index 8ab82b722445e..158b63964da2c 100644
--- a/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp32_to_fp16.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefixes=CYPRESS %s
 
-declare i16 @llvm.convert.to.fp16.f32(float) nounwind readnone
-
 define amdgpu_kernel void @test_convert_fp32_to_fp16(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) nounwind {
 ; GFX6-LABEL: test_convert_fp32_to_fp16:
 ; GFX6:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
index 767d347bcfaa1..d0314ec2c5314 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
@@ -2,28 +2,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs | FileCheck %s -check-prefix=GFX940
 
-declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
-declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %ptr, double %data)
-declare double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
-
 define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> %rsrc, double %data, i32 %vindex) {
 ; GFX90A-LABEL: buffer_atomic_add_noret_f64:
 ; GFX90A:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll
index d610091840b95..dd49027705047 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-atomics.ll
@@ -9,9 +9,6 @@
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
 
-declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg)
-
 
 define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
 ; SI-LABEL: raw_buffer_atomic_min_noret_f64:
diff --git a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll
index 5f501fec24c2e..2f29dcf7f2942 100644
--- a/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp64-min-max-buffer-ptr-atomics.ll
@@ -9,9 +9,6 @@
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX10
 ; RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs | FileCheck %s -check-prefix=G_GFX1030
 
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmin.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-declare double @llvm.amdgcn.raw.ptr.buffer.atomic.fmax.f64(double, ptr addrspace(8), i32, i32, i32 immarg)
-
 
 define amdgpu_kernel void @raw_ptr_buffer_atomic_min_noret_f64(ptr addrspace(8) inreg %rsrc, double %data, i32 %vindex) {
 ; SI-LABEL: raw_ptr_buffer_atomic_min_noret_f64:
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
index 5f76c5400e5ab..c8be516e2710e 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.f64.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fabs.f64(double) #1
-
 ; FUNC-LABEL: @fp_to_sint_f64_i32
 ; SI: v_cvt_i32_f64_e32
 define amdgpu_kernel void @fp_to_sint_f64_i32(ptr addrspace(1) %out, double %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
index 64063f65e288f..16479ee1ab801 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_sint.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG
 
-declare float @llvm.fabs.f32(float) #1
-
 define amdgpu_kernel void @fp_to_sint_i32(ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_sint_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
index c3b4e6f964db1..a64a502a3cc8f 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.f64.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare double @llvm.fabs.f64(double) #1
-
 ; SI-LABEL: {{^}}fp_to_uint_i32_f64:
 ; SI: v_cvt_u32_f64_e32
 define amdgpu_kernel void @fp_to_uint_i32_f64(ptr addrspace(1) %out, double %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
index 5170f9c76db23..9854779300b33 100644
--- a/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp_to_uint.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck  %s -check-prefixes=EG
 
-declare float @llvm.fabs.f32(float) #1
-
 define amdgpu_kernel void @fp_to_uint_f32_to_i32 (ptr addrspace(1) %out, float %in) {
 ; SI-LABEL: fp_to_uint_f32_to_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/fpenv.ll b/llvm/test/CodeGen/AMDGPU/fpenv.ll
index 80d5c9ad0cebf..b0dbf318e644d 100644
--- a/llvm/test/CodeGen/AMDGPU/fpenv.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpenv.ll
@@ -10,10 +10,6 @@
 ; RUN: llc -mtriple=amdgcn -amdgpu-enable-delay-alu=0 -global-isel=0 -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-SDAG %s
 ; RUN: llc -mtriple=amdgcn -amdgpu-enable-delay-alu=0 -global-isel=1 -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11-ISEL %s
 
-declare i64 @llvm.get.fpenv.i64()
-
-declare void @llvm.set.fpenv.i64(i64)
-
 define i64 @get_fpenv() {
 ; GFX6-SDAG-LABEL: get_fpenv:
 ; GFX6-SDAG:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
index 3752100b7dc0f..4529686541b55 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll
@@ -674,9 +674,4 @@ entry:
   ret float %add
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-declare half @llvm.fma.f16(half, half, half) #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
index 82c25c01b1779..92e4f7e327c90 100644
--- a/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpext.f16.ll
@@ -1028,6 +1028,4 @@ entry:
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fpow.ll b/llvm/test/CodeGen/AMDGPU/fpow.ll
index d855707fbe4de..0088c37f71c64 100644
--- a/llvm/test/CodeGen/AMDGPU/fpow.ll
+++ b/llvm/test/CodeGen/AMDGPU/fpow.ll
@@ -1024,13 +1024,3 @@ define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
   %pow = call float @llvm.pow.f32(float %x, float %y)
   ret float %pow
 }
-
-declare half @llvm.pow.f16(half, half)
-declare float @llvm.pow.f32(float, float)
-declare double @llvm.pow.f64(double, double)
-
-declare half @llvm.fabs.f16(half)
-declare float @llvm.fabs.f32(float)
-
-declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
-declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 6cc7368eeae61..879431068d721 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -1437,7 +1437,5 @@ entry:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll
index e361aa4db2aa9..08bc0c245854e 100644
--- a/llvm/test/CodeGen/AMDGPU/fract-match.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll
@@ -2325,25 +2325,4 @@ entry:
   ret <2 x double> %cond6
 }
 
-declare half @llvm.floor.f16(half) #0
-declare float @llvm.floor.f32(float) #0
-declare double @llvm.floor.f64(double) #0
-declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0
-declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0
-declare <2 x half> @llvm.floor.v2f16(<2 x half>) #0
-declare float @llvm.trunc.f32(float) #0
-declare float @llvm.minnum.f32(float, float) #0
-declare half @llvm.minnum.f16(half, half) #0
-declare double @llvm.minnum.f64(double, double) #0
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0
-declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
-declare half @llvm.fabs.f16(half) #0
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/fract.f64.ll b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
index 1fae9970fc109..176b4d39dc5fd 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.f64.ll
@@ -5,9 +5,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,SI,FUNC %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,CI,FUNC %s
 
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.floor.f64(double) #0
-
 ; FUNC-LABEL: {{^}}fract_f64:
 ; SI-DAG: v_fract_f64_e32 [[FRC:v\[[0-9]+:[0-9]+\]]], v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
 ; SI-DAG: v_mov_b32_e32 v[[UPLO:[0-9]+]], -1
diff --git a/llvm/test/CodeGen/AMDGPU/fract.ll b/llvm/test/CodeGen/AMDGPU/fract.ll
index bc6ec96c102e4..5c008f8b73701 100644
--- a/llvm/test/CodeGen/AMDGPU/fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/fract.ll
@@ -4,9 +4,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck --check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.floor.f32(float) #0
-
 ; GCN-LABEL: {{^}}fract_f32:
 ; GCN: v_floor_f32_e32 [[FLR:v[0-9]+]], [[INPUT:v[0-9]+]]
 ; GCN: v_sub_f32_e32 [[RESULT:v[0-9]+]], [[INPUT]], [[FLR]]
diff --git a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
index daa304ef641a2..fbcfcc56e602b 100644
--- a/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/frexp-constant-fold.ll
@@ -1,10 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
 
-declare { float, i32 } @llvm.frexp.f32.i32(float)
-declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>)
-declare { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float>)
-
 
 define { float, i32 } @frexp_frexp(float %x) {
 ; CHECK-LABEL: frexp_frexp:
diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index 4ea3323a9dbfc..ebdf5c742fa54 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -6,10 +6,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefix=GFX10
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
 
-declare i32 @llvm.fshl.i32(i32, i32, i32) nounwind readnone
-declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone
-declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
-
 define amdgpu_kernel void @fshl_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
 ; SI-LABEL: fshl_i32:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll
index 3118d63788042..4228a454d5f4b 100644
--- a/llvm/test/CodeGen/AMDGPU/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshr.ll
@@ -6,19 +6,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX10
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s -check-prefixes=GFX11
 
-declare i32 @llvm.fshr.i32(i32, i32, i32)
-declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>)
-declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>)
-declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
-declare i16 @llvm.fshr.i16(i16, i16, i16)
-declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>)
-declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>)
-declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>)
-declare i64 @llvm.fshr.i64(i64, i64, i64)
-declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
-declare i24 @llvm.fshr.i24(i24, i24, i24)
-declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>)
-
 define amdgpu_kernel void @fshr_i32(ptr addrspace(1) %in, i32 %x, i32 %y, i32 %z) {
 ; SI-LABEL: fshr_i32:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
index 1d86fbc5695b7..77f93b9017d0d 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f32.ll
@@ -4780,15 +4780,6 @@ entry:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.sqrt.f32(float) #0
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.sqrt.v3f32(<3 x float>) #0
-declare i32 @llvm.amdgcn.readfirstlane(i32) #1
-
-declare { float, i32 } @llvm.frexp.f32.i32(float) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #1 = { convergent nounwind willreturn memory(none) }
 attributes #2 = { "approx-func-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
index 932b10f14780b..1b98f61ed5daf 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.f64.ll
@@ -1864,12 +1864,6 @@ define <3 x double> @v_sqrt_v3f64(<3 x double> %x) {
   ret <3 x double> %result
 }
 
-declare double @llvm.fabs.f64(double) #0
-declare double @llvm.sqrt.f64(double) #0
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
-declare <3 x double> @llvm.sqrt.v3f64(<3 x double>) #0
-declare i32 @llvm.amdgcn.readfirstlane(i32) #1
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #1 = { convergent nounwind willreturn memory(none) }
 attributes #2 = { "approx-func-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll b/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll
index 9f19bcb638cd7..6659eec09be08 100644
--- a/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsqrt.r600.ll
@@ -234,9 +234,5 @@ define amdgpu_kernel void @recip_sqrt(ptr addrspace(1) %out, float %src) nounwin
   ret void
 }
 
-declare float @llvm.sqrt.f32(float %in) #0
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) #0
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
index 85286841cbcac..522bda54762dc 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -1302,17 +1302,6 @@ define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0,
   ret float %p1_0
 }
 
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
-declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
-declare i1 @llvm.amdgcn.class.f32(float, i32)
-declare i1 @llvm.is.fpclass.f64(double, i32 immarg)
-declare i1 @llvm.amdgcn.class.f64(double, i32)
-declare i1 @llvm.is.fpclass.f16(half, i32 immarg)
-declare i1 @llvm.amdgcn.class.f16(half, i32)
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
-declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
-
 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
diff --git a/llvm/test/CodeGen/AMDGPU/fsub64.ll b/llvm/test/CodeGen/AMDGPU/fsub64.ll
index dd2c8746ca81e..4a9bf32d59c27 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub64.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub64.ll
@@ -1,8 +1,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare double @llvm.fabs.f64(double) #0
-
 ; SI-LABEL: {{^}}fsub_f64:
 ; SI: v_add_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], -v\[[0-9]+:[0-9]+\]}}
 define amdgpu_kernel void @fsub_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1,
diff --git a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll
index cb5d6e732c701..7444a3f21a39a 100644
--- a/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/ftrunc.f64.ll
@@ -2,13 +2,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s
 
-declare double @llvm.trunc.f64(double) nounwind readnone
-declare <2 x double> @llvm.trunc.v2f64(<2 x double>) nounwind readnone
-declare <3 x double> @llvm.trunc.v3f64(<3 x double>) nounwind readnone
-declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone
-declare <8 x double> @llvm.trunc.v8f64(<8 x double>) nounwind readnone
-declare <16 x double> @llvm.trunc.v16f64(<16 x double>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}v_ftrunc_f64:
 ; CI: v_trunc_f64
 ; SI: s_bfe_u32 {{s[0-9]+}}, {{s[0-9]+}}, 0xb0014
diff --git a/llvm/test/CodeGen/AMDGPU/ftrunc.ll b/llvm/test/CodeGen/AMDGPU/ftrunc.ll
index 69c3eb0908f59..11cc02e036fea 100644
--- a/llvm/test/CodeGen/AMDGPU/ftrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/ftrunc.ll
@@ -2,13 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI --check-prefix=FUNC %s
 ; RUN: llc -mtriple=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG --check-prefix=FUNC %s
 
-declare float @llvm.trunc.f32(float) nounwind readnone
-declare <2 x float> @llvm.trunc.v2f32(<2 x float>) nounwind readnone
-declare <3 x float> @llvm.trunc.v3f32(<3 x float>) nounwind readnone
-declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone
-declare <8 x float> @llvm.trunc.v8f32(<8 x float>) nounwind readnone
-declare <16 x float> @llvm.trunc.v16f32(<16 x float>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}ftrunc_f32:
 ; EG: TRUNC
 ; SI: v_trunc_f32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
index 1853aa9303095..4e13f1754472c 100644
--- a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
+++ b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
@@ -332,5 +332,3 @@ bb:
   store i64 %i9, ptr addrspace(1) %arg, align 32
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
index 944dcda5eba6f..395d8d81e213c 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-const-address-space.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s
 
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0.f64(ptr nocapture, double) #8
-
 define protected amdgpu_kernel void @IllegalGEPConst(i32 %a, ptr addrspace(1) %b, double %c) {
 ; CHECK-LABEL: IllegalGEPConst:
 ; CHECK:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
index 08799e7e26bdf..c9eaaf1ed5bff 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx10-vop-literal.ll
@@ -60,6 +60,3 @@ define amdgpu_kernel void @test_bfe_2lit_v(ptr addrspace(1) %p) {
   store i32 %bfe, ptr addrspace(1) %ptr, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
index 81239e841e097..f7f224e37cd3c 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-user-sgpr-init16-bug.ll
@@ -182,15 +182,6 @@ define amdgpu_kernel void @all_inputs() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
index c69207c0472e7..b2dd964cb5788 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx12_scalar_subword_loads.ll
@@ -759,8 +759,3 @@ main_body:
   store i32 %zext, ptr addrspace(1) %out
   ret void
 }
-
-declare i8 @llvm.amdgcn.s.buffer.load.i8(<4 x i32>, i32, i32)
-declare i8 @llvm.amdgcn.s.buffer.load.u8(<4 x i32>, i32, i32)
-declare i16 @llvm.amdgcn.s.buffer.load.i16(<4 x i32>, i32, i32)
-declare i16 @llvm.amdgcn.s.buffer.load.u16(<4 x i32>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
index 2f20ca8301bd9..4cb6bc365d773 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx90a-enc.ll
@@ -18,11 +18,6 @@ define amdgpu_kernel void @test(<4 x i32> %x) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32 immarg, i32 immarg, i32 immarg) #2
-
 attributes #0 = { nounwind readnone speculatable willreturn "amdgpu-flat-work-group-size"="1,256" }
 attributes #1 = { nounwind memory(argmem: read) willreturn }
 attributes #2 = { convergent nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
index ee0910b21f024..e84a370fe95b0 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-no-rtn.ll
@@ -328,6 +328,3 @@ define amdgpu_ps void @global_atomic_fadd_f32_saddr_no_rtn_atomicrmw(ptr addrspa
 }
 
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
-
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
index 3454e9d1019e5..ab49cbb6fa72d 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f32-rtn.ll
@@ -236,7 +236,4 @@ define amdgpu_ps float @global_atomic_fadd_f32_saddr_rtn_atomicrmw(ptr addrspace
   ret float %ret
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
index 9d8b987d2ba68..8e8e33457db82 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.f64.ll
@@ -254,7 +254,4 @@ define amdgpu_ps double @global_atomic_fadd_f64_saddr_rtn_atomicrmw(ptr addrspac
   ret double %ret
 }
 
-declare double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-declare double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1), double)
-
 attributes #0 = {"amdgpu-unsafe-fp-atomics"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
index 666971618a5c2..d183817440e10 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-no-rtn.ll
@@ -114,6 +114,3 @@ define amdgpu_ps void @global_atomic_fadd_v2f16_saddr_no_rtn_flat_intrinsic(ptr
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret void
 }
-
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
index ae81e97ca39e2..5d47591c670ba 100644
--- a/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-atomic-fadd.v2f16-rtn.ll
@@ -69,6 +69,3 @@ define amdgpu_ps <2 x half> @global_atomic_fadd_v2f16_saddr_rtn_flat_intrinsic(p
   %ret = call <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data)
   ret <2 x half> %ret
 }
-
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-declare <2 x half> @llvm.amdgcn.flat.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
index 79de55eb63bf8..d902a170d7842 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx1030.ll
@@ -55,6 +55,4 @@ define amdgpu_ps void @global_csub_saddr_i32_nortn_neg128(ptr addrspace(1) inreg
   ret void
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
index 0147084a6996f..98243628fc46a 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr-atomics.gfx908.ll
@@ -53,7 +53,4 @@ define amdgpu_ps void @global_fadd_saddr_v2f16_nortn_neg128(ptr addrspace(1) inr
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1(ptr addrspace(1) nocapture, float) #0
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1(ptr addrspace(1) nocapture, <2 x half>) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
index b38758bae537d..1a0e2ba4652eb 100644
--- a/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-smrd-unknown.ll
@@ -14,7 +14,5 @@ bb:
   ret void
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
index f954560d0f5ca..cb7cb44602819 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan.ll
@@ -111,11 +111,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-
-declare align 4 ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr()
-
 attributes #0 = {"target-cpu"="gfx906"}
 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none)}
 
diff --git a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
index fab24e10f810f..5e28d3471e3dd 100644
--- a/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_atomics_iterative_scan_fp.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN:  opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
 ; RUN:  opt -S -mtriple=amdgcn-- -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
-declare i32 @llvm.amdgcn.workitem.id.x()
 define amdgpu_kernel void @global_atomic_fadd_uni_value(ptr addrspace(1) %ptr) #0 {
 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_value(
 ; IR-ITERATIVE-NEXT:    [[TMP1:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd.ll b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
index 3b71e8ffefbf8..c96aff2742f3f 100644
--- a/llvm/test/CodeGen/AMDGPU/global_smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/global_smrd.ll
@@ -133,9 +133,6 @@ entry:
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #1 = { nounwind readnone }
 
 !8 = !{!9, !9, i64 0}
diff --git a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
index 492d3ba7b0fcb..6f80ffd4682c9 100644
--- a/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
+++ b/llvm/test/CodeGen/AMDGPU/hip.extern.shared.array.ll
@@ -134,5 +134,3 @@ define amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
   store double %val2, ptr addrspace(3) %arrayidx2, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
index 830a40ff052ac..f35bcfad75d54 100644
--- a/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/hoist-cond.ll
@@ -41,6 +41,5 @@ bb4:                                             ; preds = %bb3
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
index 6a49eac134a67..2b30c7b6d4ca6 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-heap-v5.ll
@@ -26,7 +26,6 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
index 6f4c8911efd33..da90afa1ab8ae 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
@@ -26,7 +26,6 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #4 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
index 01f8fbfd76314..fb3ff9c3a8f48 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v5.ll
@@ -26,7 +26,6 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
index 689619227b8d7..415064981d73c 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-multigrid-sync-arg-v5.ll
@@ -26,7 +26,6 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
index 9854977c2f308..6e78c6aed7205 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queue-ptr-v5.ll
@@ -67,15 +67,5 @@ define amdgpu_kernel void @amdgcn_queue_ptr_requires_queue_ptr(ptr addrspace(1)
   ret void
 }
 
-
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-declare i64 @llvm.amdgcn.dispatch.id()
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare i1 @llvm.amdgcn.is.shared(ptr)
-declare i1 @llvm.amdgcn.is.private(ptr)
-declare void @llvm.trap()
-declare void @llvm.debugtrap()
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
index cf26a427aec32..55399f4ebe9f0 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-queueptr-v5.ll
@@ -26,7 +26,6 @@ define void @function5(ptr addrspace(4) %ptr, ptr %sink) #2 {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 ; CHECK: amdhsa.kernels:
 ; CHECK:  - .args:
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
index f34f9f38feeb4..b9d3e76732e80 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
@@ -80,8 +80,5 @@ end:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
 attributes #0 = { nounwind memory(argmem: read) }
 attributes #1 = { nounwind inaccessiblememonly }
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
index 80aa6ee0ab103..7601ef12e7744 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -34,8 +34,6 @@ bb6:                                              ; preds = %bb4, %bb3
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 
 ; Make sure this won't crash.
diff --git a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
index c1a074a81b2aa..201f96b41eab9 100644
--- a/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/icmp.i16.ll
@@ -347,7 +347,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
index 297b5180dfe9b..25d964f3959b8 100644
--- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll
@@ -452,13 +452,6 @@ bb50:                                             ; preds = %bb50, %bb48
   br label %bb50
 }
 
-declare i64 @llvm.amdgcn.s.getpc() #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare float @llvm.amdgcn.image.sample.lz.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare <2 x float> @llvm.amdgcn.image.sample.lz.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
-declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32 immarg) #3
-
 attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #2 = { nocallback nofree nosync nounwind willreturn memory(read) }
diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll
index 9da07ea04ded5..900d165952ba7 100644
--- a/llvm/test/CodeGen/AMDGPU/idot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot2.ll
@@ -2885,5 +2885,3 @@ entry:
   store i32 %add6, ptr addrspace(1) %dst, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll
index fdd913867c8f8..2c3cbe2604011 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll
@@ -3431,6 +3431,3 @@ entry:
   store i32 %res, ptr addrspace(1) %dst, align 4
   ret void
 }
-
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll
index 9a1de74034cd8..dd9c88607a27b 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll
@@ -6136,5 +6136,3 @@ entry:
   store i32 %add2, ptr addrspace(1) %dst, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll
index 8c53d2671de3f..7f272b927fc15 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll
@@ -3120,5 +3120,3 @@ entry:
   store i8 %add8, ptr addrspace(1) %dst, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll
index 3828fa557731e..480eb0fd684fa 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll
@@ -3338,5 +3338,3 @@ entry:
   store i32 %add8, ptr addrspace(1) %dst, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/image-attributes.ll b/llvm/test/CodeGen/AMDGPU/image-attributes.ll
index 29d0f0b08c4d7..c7460c29f0969 100644
--- a/llvm/test/CodeGen/AMDGPU/image-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-attributes.ll
@@ -161,11 +161,6 @@ entry:
 %opencl.image2d_t = type opaque
 %opencl.image3d_t = type opaque
 
-declare [3 x i32] @llvm.OpenCL.image.get.size.2d(ptr addrspace(1)) #0
-declare [3 x i32] @llvm.OpenCL.image.get.size.3d(ptr addrspace(1)) #0
-declare [2 x i32] @llvm.OpenCL.image.get.format.2d(ptr addrspace(1)) #0
-declare [2 x i32] @llvm.OpenCL.image.get.format.3d(ptr addrspace(1)) #0
-
 attributes #0 = { readnone }
 
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9}
diff --git a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
index 83b650e2d755e..d0ae42fa69a44 100644
--- a/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-load-d16-tfe.ll
@@ -661,9 +661,4 @@ define amdgpu_ps void @load_1d_v4f16_tfe_dmask15(<8 x i32> inreg %rsrc, i32 %s)
   ret void
 }
 
-declare { half, i32 } @llvm.amdgcn.image.load.1d.sl_f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <2 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v2f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <3 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v3f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare { <4 x half>, i32 } @llvm.amdgcn.image.load.1d.sl_v4f16i32s.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/image-resource-id.ll b/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
index 08a9dacfc0f5a..32cd409145900 100644
--- a/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-resource-id.ll
@@ -324,9 +324,6 @@ entry:
 %opencl.image2d_t = type opaque
 %opencl.image3d_t = type opaque
 
-declare i32 @llvm.OpenCL.image.get.resource.id.2d(ptr addrspace(1)) #0
-declare i32 @llvm.OpenCL.image.get.resource.id.3d(ptr addrspace(1)) #0
-
 attributes #0 = { readnone }
 
 !opencl.kernels = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13,
diff --git a/llvm/test/CodeGen/AMDGPU/image-sample-waterfall.ll b/llvm/test/CodeGen/AMDGPU/image-sample-waterfall.ll
index 5cb9721ff7457..d9d6f1f2411c4 100644
--- a/llvm/test/CodeGen/AMDGPU/image-sample-waterfall.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-sample-waterfall.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
 
-
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-
 ; GCN-LABEL: {{^}}water_loop_rsrc:
 
 ; GCN: [[RSRC_LOOP:.L[a-zA-Z0-9_]+]]:                                  ; =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/image-schedule.ll b/llvm/test/CodeGen/AMDGPU/image-schedule.ll
index dbd9efc58e59d..7523ff47f638b 100644
--- a/llvm/test/CodeGen/AMDGPU/image-schedule.ll
+++ b/llvm/test/CodeGen/AMDGPU/image-schedule.ll
@@ -36,13 +36,10 @@ define dllexport amdgpu_cs void @_amdgpu_cs_main(i32 inreg %arg, i32 inreg %arg1
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare i64 @llvm.amdgcn.s.getpc() #1
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #2
 
 ; Function Attrs: nounwind writeonly
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #3
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
index 9927f4d166bad..474bab9c999df 100644
--- a/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
+++ b/llvm/test/CodeGen/AMDGPU/img-nouse-adjust.ll
@@ -16,7 +16,6 @@ define amdgpu_cs void @_amdgpu_cs_main(i32 %dummy) local_unnamed_addr #0 {
 }
 
 ; Function Attrs: nounwind readonly
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
   
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly  }
diff --git a/llvm/test/CodeGen/AMDGPU/imm.ll b/llvm/test/CodeGen/AMDGPU/imm.ll
index f7a0e296fa173..b2be128729384 100644
--- a/llvm/test/CodeGen/AMDGPU/imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/imm.ll
@@ -1846,6 +1846,4 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
index e37b6ff10ffa9..31258cabae532 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-arg-v5-opt.ll
@@ -189,12 +189,6 @@ define amdgpu_kernel void @get_work_group_size_z_reqd(ptr addrspace(1) %out) #0
   ret void
 }
 
-
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
-
 !llvm.module.flags = !{!1}
 
 attributes #0 = { nounwind "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 72f10ea892e53..3e8c2591614d2 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -352,14 +352,5 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr()
-declare noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-declare i64 @llvm.amdgcn.dispatch.id()
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare i1 @llvm.amdgcn.is.shared(ptr)
-declare i1 @llvm.amdgcn.is.private(ptr)
-declare void @llvm.trap()
-declare void @llvm.debugtrap()
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
index 6b2080305e730..fdde728650bbd 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernel-argument-alignment.ll
@@ -55,7 +55,5 @@ define amdgpu_kernel void @test_aligned_to_eight(i64 %eight)  {
 ; CHECK-NEXT:         .kernarg_segment_size: 264
 ; CHECK-LABEL:        .name:           test_aligned_to_eight
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
index 4c5c136f5333f..06ea18443493a 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-attributes.ll
@@ -89,9 +89,3 @@ join:                                             ; preds = %new, %old
   %tmp16 = atomicrmw add ptr addrspace(1) %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index a5792bf29ddca..f49d8fd449922 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -3,27 +3,10 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-declare i32 @llvm.amdgcn.lds.kernel.id() #0
-declare i64 @llvm.amdgcn.dispatch.id() #0
-
-
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-
 ; Avoid adding all of these to the output attribute sets
 define void @use_everything_else() {
 ; CHECK-LABEL: define {{[^@]+}}@use_everything_else
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[VAL0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    [[VAL1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
 ; CHECK-NEXT:    [[VAL2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
@@ -71,7 +54,7 @@ define void @use_everything_else() {
 
 define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kernarg) {
 ; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v4_0
-; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void @use_everything_else()
 ; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -89,7 +72,7 @@ define amdgpu_kernel void @test_default_queue_offset_v4_0(ptr addrspace(1) %kern
 
 define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kernarg) {
 ; CHECK-LABEL: define {{[^@]+}}@test_default_queue_offset_v5_0
-; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @use_everything_else()
 ; CHECK-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -107,7 +90,7 @@ define amdgpu_kernel void @test_default_queue_offset_v5_0(ptr addrspace(1) %kern
 
 define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %kernarg) {
 ; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
-; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; V4-NEXT:    call void @use_everything_else()
 ; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
@@ -116,7 +99,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
@@ -125,7 +108,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
@@ -143,7 +126,7 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
 
 define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) {
 ; V4-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] {
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
 ; V4-NEXT:    call void @use_everything_else()
 ; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
@@ -152,7 +135,7 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
@@ -161,7 +144,7 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
@@ -179,7 +162,7 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %
 
 define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr addrspace(1) %kernarg) {
 ; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
-; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; V4-NEXT:    call void @use_everything_else()
 ; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -188,7 +171,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -197,7 +180,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR1]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
@@ -215,7 +198,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
 
 define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr addrspace(1) %kernarg) {
 ; V4-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
-; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR3]] {
+; V4-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
 ; V4-NEXT:    call void @use_everything_else()
 ; V4-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V4-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -224,7 +207,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
 ; V4-NEXT:    ret void
 ;
 ; V5-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
-; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V5-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; V5-NEXT:    call void @use_everything_else()
 ; V5-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V5-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -233,7 +216,7 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
 ; V5-NEXT:    ret void
 ;
 ; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
-; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; V6-NEXT:    call void @use_everything_else()
 ; V6-NEXT:    [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
 ; V6-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
@@ -257,26 +240,26 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
 
 
 ;.
-; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR3]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR4]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
-; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR0]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;.
 ; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
index 31fa32b3475cb..b6d96abbf2b89 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-gfx9.ll
@@ -83,7 +83,4 @@ define amdgpu_kernel void @insertelement_with_call(ptr addrspace(1) %ptr, i32 %i
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
index cbb5d9e169284..b94f599e1c04f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si-pregfx9.ll
@@ -45,9 +45,6 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 1f92427fe8a23..a897cda11b1d6 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -543,9 +543,6 @@ bb8:                                              ; preds = %bb2
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
index 3cabe41afb05a..f84bbf85a0f45 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -4,8 +4,6 @@
 ; Verify that we consider the xor at the end of the waterfall loop emitted for
 ; divergent indirect addressing as a terminator.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; There should be no spill code inserted between the xor and the real terminator
 define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) {
   ; GCN-LABEL: name: extract_w_offset_vgpr
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
index 40cb061f61ab4..db4aba0f94440 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-private-64.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=-promote-alloca,+max-private-element-size-16 -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -mattr=+promote-alloca -disable-promote-alloca-to-vector -verify-machineinstrs < %s | FileCheck --check-prefixes=CI-PROMOTE,SI %s
 
-declare void @llvm.amdgcn.s.barrier() #0
-
 ; SI-LABEL: {{^}}private_access_f64_alloca:
 
 ; SI-ALLOCA16: buffer_store_dwordx2
diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
index 8183106b0ce9d..a9f3d7e23cf6f 100644
--- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -211,5 +211,3 @@ inner_loop:                                     ; preds = %LeafBlock, %LeafBlock
 return:
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll b/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll
index 4db5e2a472ee7..9de36d7245552 100644
--- a/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/input-mods.r600.ll
@@ -47,7 +47,3 @@ define amdgpu_ps void @test(<4 x float> inreg %reg0) {
    call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
-
-declare float @llvm.exp2.f32(float) readnone
-declare float @llvm.fabs.f32(float) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
index cddfb21a6fbdf..bbde4b95bf7b0 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define void @f0() {
 ; GFX11-LABEL: f0:
 ; GFX11:       ; %bb.0: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
index a142bddbae69a..707e519f0559d 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-exp.mir
@@ -10,8 +10,6 @@
     ret <4 x float> <float 5.000000e-01, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>
   }
 
-  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
   attributes #0 = { nounwind }
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 68427e8937bb9..a3e91df393c09 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -2348,7 +2348,5 @@ define amdgpu_kernel void @dynamic_insertelement_v8f64(ptr addrspace(1) %out, <8
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 1ba2491d2210e..096596727c6f1 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -3185,8 +3185,5 @@ define amdgpu_kernel void @v_insertelement_v16f16_dynamic(ptr addrspace(1) %out,
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
index 80ed8318d8abe..45e991f3bf791 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-nosaddr.ll
@@ -31,8 +31,5 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
index c63fe3d6d2a37..649e6f2e4df35 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.subtest-saddr.ll
@@ -29,8 +29,5 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(ptr addrspace(1) %
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 526ee5a51745d..a0f772cb42350 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -8216,9 +8216,6 @@ define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
   ret i64 %add
 }
 
-declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32)
-declare i32 @llvm.amdgcn.mul.u24(i32, i32)
-
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX6: {{.*}}
 ; GFX7: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/internalize.ll b/llvm/test/CodeGen/AMDGPU/internalize.ll
index 6b2a4d5fc328b..c335bcc7e641b 100644
--- a/llvm/test/CodeGen/AMDGPU/internalize.ll
+++ b/llvm/test/CodeGen/AMDGPU/internalize.ll
@@ -54,8 +54,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { noinline nounwind }
 attributes #2 = { alwaysinline nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll b/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll
index 693ebc8ad11ce..159def8699c7b 100644
--- a/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/invariant-image-load.ll
@@ -23,7 +23,4 @@ define amdgpu_ps void @test(<8 x i32> inreg %load, <8 x i32> inreg %store) {
   ret void
 }
 
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg)
-
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
index 29a96c227f2f0..e0ca27f8e6f33 100644
--- a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
+++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll
@@ -14,16 +14,12 @@
 %struct.MicrofacetExtra = type { <3 x float>, <3 x float>, <3 x float>, float, [12 x i8] }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #0
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0
 
 ; Function Attrs: argmemonly nofree nosync nounwind willreturn
-declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture) #1
 
 ; Function Attrs: norecurse
 define internal fastcc void @svm_node_closure_bsdf(ptr addrspace(1) %sd, ptr %stack, <4 x i32> %node, ptr %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, ptr addrspace(1) %arrayidx.i.i2202, ptr addrspace(1) %retval.0.i.i22089, ptr addrspace(1) %retval.1.i221310, i1 %cmp575, ptr addrspace(1) %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 {
@@ -199,13 +195,10 @@ kernel_set_buffer_pointers.exit:
 }
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fabs.f32(float) #0
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.maxnum.f32(float, float) #0
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #3
 
 attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
 attributes #1 = { argmemonly nofree nosync nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
index 1fd0c67737a27..8af5ec4711f18 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll
@@ -6,7 +6,6 @@
 
 declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
 declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
-declare void @llvm.amdgcn.cs.chain(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn
 
 define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) {
   ; GISEL-GFX11-LABEL: name: chain_to_chain
diff --git a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
index fa447f981bf3a..8e647a9994c11 100644
--- a/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll
@@ -6,7 +6,6 @@
 
 declare amdgpu_cs_chain void @callee(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
 declare amdgpu_cs_chain_preserve void @callee_preserve(<3 x i32> inreg, { i32, ptr addrspace(5), i32, i32 })
-declare void @llvm.amdgcn.cs.chain(ptr, i64, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) noreturn
 
 define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr) {
   ; GISEL-GFX11-LABEL: name: chain_to_chain
diff --git a/llvm/test/CodeGen/AMDGPU/jump-address.ll b/llvm/test/CodeGen/AMDGPU/jump-address.ll
index 0747133c0c581..2793fd503e0a3 100644
--- a/llvm/test/CodeGen/AMDGPU/jump-address.ll
+++ b/llvm/test/CodeGen/AMDGPU/jump-address.ll
@@ -46,5 +46,3 @@ IF13:                                             ; preds = %ELSE
   %23 = fadd float 0x3FF8000000000000, %22
   br label %ENDIF
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/kcache-fold.ll b/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
index c3b41af529ecd..06a0f26632b85 100644
--- a/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/kcache-fold.ll
@@ -104,9 +104,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index 2370ceff89bd5..06e70ec7ef268 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -1,8 +1,6 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
 
-declare void @llvm.trap() #0
-
 ; DOORBELL:      .amdhsa_kernel trap
 ; DOORBELL-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; DOORBELL-NEXT:     .amdhsa_private_segment_fixed_size 0
diff --git a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
index 3e0ad65c49821..b93910d16bdf5 100644
--- a/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
@@ -165,8 +165,4 @@ end:
   ret float 0.
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
-declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
index 34aecd77c2506..5d00d98c9753e 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-nan.ll
@@ -41,10 +41,4 @@ define float @fma_not_fmaxnm_maybe_nan(i32 %i1, i32 %i2, i32 %i3) #0 {
   ret float %val
 }
 
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-declare float @llvm.amdgcn.fmad.ftz.f32(float, float, float)
-
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
-declare half @llvm.canonicalize.f16(half)
-
 attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" "target-features"="+mad-mac-f32-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
index 64948c374e4dd..5331fde3cafc6 100644
--- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
+++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll
@@ -642,31 +642,6 @@ define float @v_test_known_not_snan_cubeid_input_fmed3_r_i_i_f32(float %a, float
   ret float %med
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.sin.f32(float) #1
-declare float @llvm.cos.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.trunc.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.ceil.f32(float) #1
-declare float @llvm.round.f32(float) #1
-declare float @llvm.rint.f32(float) #1
-declare float @llvm.nearbyint.f32(float) #1
-declare float @llvm.canonicalize.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.copysign.f32(float, float) #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.ldexp.f32.i32(float, i32) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare float @llvm.amdgcn.frexp.mant.f32(float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.amdgcn.fract.f32(float) #1
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
 
diff --git a/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll
index f10e7bdbfc84a..fb541842be5ad 100644
--- a/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/knownbits-recursion.ll
@@ -65,5 +65,3 @@ bb4:                                              ; preds = %bb4, %bb
   %tmp88 = icmp eq i32 %tmp87, 1000
   br i1 %tmp88, label %bb1, label %bb4
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll
index b991218ab812c..5efeba5f06a78 100644
--- a/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll
+++ b/llvm/test/CodeGen/AMDGPU/lcssa-optnone.ll
@@ -22,5 +22,3 @@ for.inc:
 for.end:
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
index 8c23ace9b014b..1cb01a6b9360d 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-alignment.ll
@@ -9,9 +9,6 @@
 @lds.missing.align.0 = internal unnamed_addr addrspace(3) global [39 x i32] undef
 @lds.missing.align.1 = internal unnamed_addr addrspace(3) global [7 x i64] undef
 
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #0
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #0
-
 
 ; HSA-LABEL: {{^}}test_no_round_size_1:
 ; HSA: .amdhsa_group_segment_fixed_size 38
diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fadd.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fadd.ll
index 74aa755bfce3d..546e6c240b74e 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fadd.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
-declare float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) nocapture, float, i32, i32, i1)
-
 ; GCN-LABEL: {{^}}lds_ds_fadd:
 ; VI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0
diff --git a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
index 142a6ed19daf8..d8142fa2ce578 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-atomic-fmin-fmax.ll
@@ -13,11 +13,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=G_GFX10 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=G_GFX11 %s
 
-declare float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) nocapture, float, i32, i32, i1)
-declare float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) nocapture, float, i32, i32, i1)
-declare double @llvm.amdgcn.ds.fmin.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
-declare double @llvm.amdgcn.ds.fmax.f64(ptr addrspace(3) nocapture, double, i32, i32, i1)
-
 
 define amdgpu_kernel void @lds_ds_fmin(ptr addrspace(5) %out, ptr addrspace(3) %ptrf, i32 %idx) {
 ; SI-LABEL: lds_ds_fmin:
diff --git a/llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll b/llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll
index 5cb3ca0b80b66..96b27da55c69d 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-dma-waits.ll
@@ -12,9 +12,6 @@
 @lds.8 = internal addrspace(3) global [64 x float] poison, align 16
 @lds.9 = internal addrspace(3) global [64 x float] poison, align 16
 
-declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
-declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
-
 ; GCN-LABEL: {{^}}buffer_load_lds_dword_2_arrays:
 ; GCN-COUNT-4: buffer_load_dword
 ; GCN: s_waitcnt vmcnt(2)
@@ -150,5 +147,3 @@ main_body:
   store float %val.9, ptr addrspace(1) %out.gep.9
   ret void
 }
-
-declare void @llvm.amdgcn.wave.barrier()
diff --git a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
index 01af334652382..e5ae6ebb4e8f8 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-misaligned-bug.ll
@@ -253,5 +253,3 @@ bb:
   store <4 x i32> %v8, ptr %gep, align 8
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/lds-output-queue.ll b/llvm/test/CodeGen/AMDGPU/lds-output-queue.ll
index 0d06eb59e7bfc..dc86f56fde5f1 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-output-queue.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-output-queue.ll
@@ -23,8 +23,6 @@ entry:
   ret void
 }
 
-declare void @llvm.r600.group.barrier() nounwind convergent
-
 ; The machine scheduler does not do proper alias analysis and assumes that
 ; loads from global values (Note that a global value is different that a
 ; value from global memory.  A global value is a value that is declared
diff --git a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
index 60dbd1c082a24..b32dada934f5b 100644
--- a/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
+++ b/llvm/test/CodeGen/AMDGPU/lds-relocs.ll
@@ -55,7 +55,6 @@ main_body:
 }
 
 ; Function Attrs: convergent nounwind readnone
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32) #4
 
 attributes #0 = { "no-signed-zeros-fp-math"="true" }
 attributes #4 = { convergent nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
index d23dee1f02f09..c268b1986105a 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.f16.ll
@@ -275,9 +275,4 @@ define <4 x half> @raw_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sgpr_s
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
index bdcb77201714a..1781853087208 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.format.ll
@@ -344,9 +344,4 @@ define <4 x float> @raw_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffset__sgpr_
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll
index 28059db0bede3..04e4ceaa3a9f7 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.load.ll
@@ -1910,16 +1910,3 @@ define float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffset
   %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg)
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg)
-
-declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg)
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg)
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg)
-
-declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32 immarg)
-declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll
index ed5fa05fa8ed3..78a7260ea04e5 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f16.ll
@@ -600,7 +600,3 @@ define void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_so
   call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll
index e38de72e1f0f1..0840e0cfadc4e 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.format.f32.ll
@@ -701,8 +701,3 @@ define void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_so
   call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll
index 39c58f8f39d59..50cc796c796ed 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.buffer.store.ll
@@ -1794,15 +1794,3 @@ define void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffset_o
   call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 5000, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
index 5b8bd1f60233b..b841026789567 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.f16.ll
@@ -325,9 +325,4 @@ define <4 x half> @raw_ptr_buffer_load_format_v4f16__sgpr_rsrc__vgpr_voffset__sg
   ret <4 x half> %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
index 7dabd9a395746..43c107afbe156 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.format.ll
@@ -404,9 +404,4 @@ define <4 x float> @raw_ptr_buffer_load_format_v4f32__sgpr_rsrc__vgpr_voffset__s
   ret <4 x float> %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll
index 1a9f7b1619f4c..1b385c714584d 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.load.ll
@@ -2260,16 +2260,3 @@ define float @raw_ptr_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_vof
   %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
   ret float %val
 }
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg)
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg)
-
-declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg)
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg)
-
-declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32 immarg)
-declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll
index eada2004161d1..2b43200588c6e 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f16.ll
@@ -710,7 +710,3 @@ define void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgp
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll
index 60db62dc43a61..0a40946d83be9 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.format.f32.ll
@@ -821,8 +821,3 @@ define void @raw_ptr_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffset__sgp
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float> %val, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll
index 78e29387b1d40..4c855686cf757 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.buffer.store.ll
@@ -2124,15 +2124,3 @@ define void @raw_ptr_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr_soffs
   call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 5000, i32 %soffset, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg)
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
index 24dc4f1b3c0aa..1b1472184772a 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.f16.ll
@@ -508,9 +508,4 @@ define half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(ptr
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
index 01dc0328f2d2d..ba678ff409716 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.load.ll
@@ -583,9 +583,4 @@ define float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(pt
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll
index cd60d5b21faba..67f224b8444a0 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.f16.ll
@@ -638,8 +638,3 @@ define void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(ha
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll
index 5c67d82c1e977..b1a1bfd95fd12 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.ptr.tbuffer.store.ll
@@ -1620,8 +1620,3 @@ define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffse
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
index bcffca8a3c4fc..38a64621fbd83 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.f16.ll
@@ -428,9 +428,4 @@ define half @raw_tbuffer_load_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4
   ret half %val
 }
 
-declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
index 51e56a47fc2f2..2c02fbef9c20a 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.load.ll
@@ -493,9 +493,4 @@ define float @raw_tbuffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(<4
   ret float %val
 }
 
-declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll
index a1d8acdb4cc53..deb7145afb16c 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.f16.ll
@@ -538,8 +538,3 @@ define void @raw_tbuffer_store_f16__sgpr_rsrc__vgpr_voffset__sgpr_soffset_dlc(ha
   call void @llvm.amdgcn.raw.tbuffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 78, i32 4)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll
index 7c0aa26a8a699..b03a75b4f70b8 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.raw.tbuffer.store.ll
@@ -1370,8 +1370,3 @@ define void @raw_tbuffer_store_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffset_voffse
   call void @llvm.amdgcn.raw.tbuffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 94, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll b/llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll
index bfd97c53522c9..b31f403e91c33 100644
--- a/llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/legalize-soffset-mbuf.ll
@@ -1236,37 +1236,3 @@ define void @llvm_amdgcn_raw_ptr_tbuffer_store_v4f32(<4 x float> %val, i32 %voff
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %val, ptr addrspace(8) poison, i32 %voffset, i32 %soffset, i32 0, i32 0)
   ret void
 }
-
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 )
-declare float @llvm.amdgcn.raw.tbuffer.load.f32(<4 x i32>, i32, i32, i32, i32)
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32)
-declare <2 x float> @llvm.amdgcn.raw.tbuffer.load.v2f32(<4 x i32>, i32, i32, i32, i32)
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32)
-declare <3 x float> @llvm.amdgcn.raw.tbuffer.load.v3f32(<4 x i32>, i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.f32(float, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32)
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32 )
-declare float @llvm.amdgcn.raw.ptr.tbuffer.load.f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32)
-declare <2 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32)
-declare <3 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f32(float, ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f32(<3 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8) nocapture, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
index 761e3ae6f9f31..85ea6a347a598 100644
--- a/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/literal-constant-like-operand-instruction-size.ll
@@ -2,7 +2,6 @@
 
 
 ; Restrict maximum branch to between +31 and -32 dwords
-declare void @llvm.amdgcn.s.sleep(i32) #0
 
 @name1 = external addrspace(1) global i32
 @name2 = external addrspace(1) global i32
diff --git a/llvm/test/CodeGen/AMDGPU/literals.ll b/llvm/test/CodeGen/AMDGPU/literals.ll
index 0e6400e83d486..413b74a95c494 100644
--- a/llvm/test/CodeGen/AMDGPU/literals.ll
+++ b/llvm/test/CodeGen/AMDGPU/literals.ll
@@ -59,6 +59,4 @@ entry:
   ret void
 }
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.addrspacecast.nonnull.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.addrspacecast.nonnull.ll
index 265353675b349..e140a17807468 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.addrspacecast.nonnull.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.addrspacecast.nonnull.ll
@@ -57,13 +57,6 @@ define void @flat_to_private(ptr %ptr) {
   store volatile i32 7, ptr addrspace(5) %1, align 4
   ret void
 }
-
-declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3))
-declare ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5))
-declare ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr)
-declare ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr)
-
-declare <4 x ptr> @llvm.amdgcn.addrspacecast.nonnull.v4p0.v4p3(<4 x ptr addrspace(3)>)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; DAGISEL-ASM: {{.*}}
 ; GISEL-ASM: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
index 8b16fef915a79..3ed594250a516 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_alignbyte_b32:
 ; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}
 define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 %src2, i32 %src3) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
index 9b63a8a3efcf9..e7915d39842cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.cond.sub.ll
@@ -211,9 +211,6 @@ main_body:
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "target-features"="+atomic-csub-no-rtn-insts" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
index 2b0584d39a3be..86933b8703510 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,PREGFX12
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX12PLUS
 
-declare i32 @llvm.amdgcn.buffer.atomic.csub(i32, <4 x i32>, i32, i32, i1)
-declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32)
-
 ; GCN-LABEL: {{^}}buffer_atomic_csub_rtn:
 ; PREGFX12: buffer_atomic_csub v0, v1, s[0:3], 0 idxen glc
 ; GFX12PLUS: buffer_atomic_sub_clamp_u32 v0, v1, s[0:3], null idxen th:TH_ATOMIC_RETURN
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
index 49e272bed9cef..1b002312ac92b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll
@@ -2,16 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
-
-declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; GCN-LABEL: {{^}}lds_atomic_dec_ret_i32:
 ; CIVI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll
index 1e1bc2bbbd26a..72b911ec85406 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll
@@ -1,11 +1,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
 ; RUN: not --crash llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)
-declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-
 ; GFX908:  LLVM ERROR: Cannot select: {{.+}}: f32,ch = BUFFER_ATOMIC_FADD
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
index bd07dd137ac49..effdc7114d8f3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll
@@ -1,12 +1,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -amdgpu-atomic-optimizer-strategy=DPP | FileCheck %s -check-prefix=GCN
 
-declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1)
-declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1)
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float)
-declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>)
-declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float)
-
 ; GCN-LABEL: {{^}}buffer_atomic_add_f32:
 ; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen
 define amdgpu_ps void @buffer_atomic_add_f32(<4 x i32> inreg %rsrc, float %data, i32 %vindex) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
index 2ed61352f4592..b8a43d917f2f4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll
@@ -2,16 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2
-declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) #2
-
-declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2
-declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) #2
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; GCN-LABEL: {{^}}lds_atomic_inc_ret_i32:
 ; CIVI-DAG: s_mov_b32 m0
 ; GFX9-NOT: m0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index 3ff34c0c2eaff..039402bbee032 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ballot.i32(i1)
-declare i32 @llvm.ctpop.i32(i32)
-
 ; Test ballot(0)
 
 define amdgpu_cs i32 @constant_false() {
@@ -397,8 +394,6 @@ false:
   ret i32 33
 }
 
-declare i32 @llvm.amdgcn.icmp.i32(i1, i1, i32)
-
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index 5a15dc53a292c..94e9cbed94886 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s
 
-declare i64 @llvm.amdgcn.ballot.i64(i1)
-declare i64 @llvm.ctpop.i64(i64)
-
 ; Test ballot(0)
 
 define amdgpu_cs i64 @constant_false() {
@@ -400,8 +397,6 @@ false:
   ret i32 33
 }
 
-declare i64 @llvm.amdgcn.icmp.i64(i1, i1, i32)
-
 define amdgpu_cs i32 @branch_divergent_simulated_negated_ballot_ne_zero_and(i32 %v1, i32 %v2) {
 ; CHECK-LABEL: branch_divergent_simulated_negated_ballot_ne_zero_and:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
index 5dbfdf24ef36f..6e2875d6e1e8f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll
@@ -4,9 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -global-isel -mcpu=gfx1010 < %s | FileCheck %s --check-prefixes=CHECK,GISEL
 ; RUN: llc -mtriple=amdgcn -global-isel -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck %s --check-prefixes=CHECK,GISEL
 
-declare i64 @llvm.amdgcn.ballot.i64(i1)
-declare i64 @llvm.ctpop.i64(i64)
-
 ; Test ballot(0)
 
 define amdgpu_cs i64 @constant_false() {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
index b3079f5c97871..3f78d779413ca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitreplicate.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 
-declare i64 @llvm.amdgcn.s.bitreplicate(i32)
-
 define i64 @test_s_bitreplicate_constant() {
 ; GFX11-LABEL: test_s_bitreplicate_constant:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index eed648f167f39..2227d2c587178 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -184,26 +184,4 @@ main_body:
   ret float %v.float
 }
 
-declare i32 @llvm.amdgcn.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.buffer.atomic.cmpswap(i32, i32, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.swap.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.add.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.sub.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.smin.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.umin.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.smax.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.umax.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.and.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.or.i64(i64, <4 x i32>, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.buffer.atomic.xor.i64(i64, <4 x i32>, i32, i32, i1) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll
index fdbe6db8da14e..f66673d2212df 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.dwordx3.ll
@@ -81,14 +81,3 @@ main_body:
   ret <3 x float> %data
 }
 
-declare <3 x float> @llvm.amdgcn.buffer.load.format.v3f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <3 x float> @llvm.amdgcn.buffer.load.v3f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.raw.buffer.load.v3f32(<4 x i32>, i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.format.v3f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.struct.buffer.load.v3f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.struct.ptr.buffer.load.v3f32(ptr addrspace(8), i32, i32, i32, i32) #0
-
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll
index 3d67dfdd674d4..0f7573e2ec0e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.d16.ll
@@ -48,8 +48,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.buffer.load.format.f16(<4 x i32>, i32, i32, i1, i1)
-declare <2 x half> @llvm.amdgcn.buffer.load.format.v2f16(<4 x i32>, i32, i32, i1, i1)
-declare <3 x half> @llvm.amdgcn.buffer.load.format.v3f16(<4 x i32>, i32, i32, i1, i1)
-declare <4 x half> @llvm.amdgcn.buffer.load.format.v4f16(<4 x i32>, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
index 6851302fdcda3..95dfbd9a0fc51 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.format.ll
@@ -126,8 +126,4 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.buffer.load.format.f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <2 x float> @llvm.amdgcn.buffer.load.format.v2f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
index a209dcfe3a7a0..d01f76506133c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll
@@ -466,11 +466,4 @@ define amdgpu_ps float @no_fold_fi_reg_soffset(<4 x i32> inreg %rsrc, i32 inreg
   ret float %ret.val
 }
 
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0
-declare i8 @llvm.amdgcn.buffer.load.i8(<4 x i32>, i32, i32, i1, i1) #0
-declare i16 @llvm.amdgcn.buffer.load.i16(<4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll
index 269956f948f50..4062556c0a8bf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.dwordx3.ll
@@ -71,14 +71,3 @@ main_body:
   call void @llvm.amdgcn.struct.ptr.buffer.store.v3f32(<3 x float> %1, ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v3f32(<3 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v3f32(<3 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll
index a8cabdc9b0304..41060c1501de8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.d16.ll
@@ -56,8 +56,3 @@ main_body:
   call void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %index, i32 0, i1 0, i1 0)
   ret void
 }
-
-declare void @llvm.amdgcn.buffer.store.format.f16(half, <4 x i32>, i32, i32, i1, i1)
-declare void @llvm.amdgcn.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i1, i1)
-declare void @llvm.amdgcn.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i1, i1)
-declare void @llvm.amdgcn.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
index 41e2b4d0e5512..559004a8fb2e9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
@@ -95,10 +95,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.buffer.store.format.f32(float, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
index 8b18848a62792..b5cd7b2308d5a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
@@ -257,12 +257,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.i8(i8, <4 x i32>, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.buffer.store.i16(i16, <4 x i32>, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
index bd1888b6965a1..808bc9daa231e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1() #0
-
 ; GCN-LABEL: {{^}}test_buffer_wbinvl1:
 ; GCN-NEXT: ; %bb.0:
 ; SI-NEXT: buffer_wbinvl1 ; encoding: [0x00,0x00,0xc4,0xe1,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
index b937c42e14ed5..71385d90401bb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.sc.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=SI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1.sc() #0
-
 ; SI-LABEL: {{^}}test_buffer_wbinvl1_sc:
 ; SI-NEXT: ; %bb.0:
 ; SI-NEXT: buffer_wbinvl1_sc ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
index 64ab8ecefd490..fb2ce5540e204 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0
-
 ; GCN-LABEL: {{^}}test_buffer_wbinvl1_vol:
 ; GCN-NEXT: ; %bb.0:
 ; CI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xc0,0xe1,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
index 89dbe9b0e17ca..36231461f4636 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 
-declare half @llvm.fabs.f16(half %a)
-declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b)
-
 ; GCN-LABEL: {{^}}class_f16:
 ; GCN-DAG: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; GCN-DAG: buffer_load_dword v[[B_I32:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
index 27fb4e5f965c9..fbd81b0ba9591 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll
@@ -1,11 +1,5 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
 
-declare i1 @llvm.amdgcn.class.f32(float, i32) #1
-declare i1 @llvm.amdgcn.class.f64(double, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare double @llvm.fabs.f64(double) #1
-
 ; SI-LABEL: {{^}}test_class_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll
index a36f83f3f2bda..2cfbe903bef52 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.cos.f16(half %a)
-
 ; GCN-LABEL: {{^}}cos_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_cos_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
index 39952d4cd396f..7ff8e8cb19128 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cos.f32(float) #0
-
 ; GCN-LABEL: {{^}}v_cos_f32:
 ; GCN: v_cos_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @v_cos_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
index c1e808cb3a088..67f85acc95ea8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubeid(float, float, float) #0
-
 ; GCN-LABEL: {{^}}test_cubeid:
 ; GCN: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_cubeid(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
index 754f31c97e232..24ee12374a40d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubema(float, float, float) #0
-
 ; GCN-LABEL: {{^}}test_cubema:
 ; GCN: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_cubema(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
index 328665fe2c860..04913e171e38c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubesc(float, float, float) #0
-
 ; GCN-LABEL: {{^}}test_cubesc:
 ; GCN: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_cubesc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
index 26af4117cbf90..a3e4e16e06673 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.cubetc(float, float, float) #0
-
 ; GCN-LABEL: {{^}}test_cubetc:
 ; GCN: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @test_cubetc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
index e21d61036375a..027cecc6b469b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.dpp.ll
@@ -128,15 +128,5 @@ define amdgpu_cs void @test_cvt_sr_fp8_f32_byte2(i32 %a, i32 %r, i32 %old, ptr a
   ret void
 }
 
-declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
-declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
-declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
-declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
-declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
-declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
-
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #1
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #1
-
 attributes #0 = { nounwind convergent }
 attributes #1 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
index 9b8fdf9017045..b7ffd61d553f5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.ll
@@ -4,15 +4,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
 
-declare float @llvm.amdgcn.cvt.f32.bf8(i32, i32)
-declare float @llvm.amdgcn.cvt.f32.fp8(i32, i32)
-declare <2 x float> @llvm.amdgcn.cvt.pk.f32.bf8(i32, i1)
-declare <2 x float> @llvm.amdgcn.cvt.pk.f32.fp8(i32, i1)
-declare i32 @llvm.amdgcn.cvt.pk.bf8.f32(float, float, i32, i1)
-declare i32 @llvm.amdgcn.cvt.pk.fp8.f32(float, float, i32, i1)
-declare i32 @llvm.amdgcn.cvt.sr.bf8.f32(float, i32, i32, i32)
-declare i32 @llvm.amdgcn.cvt.sr.fp8.f32(float, i32, i32, i32)
-
 define float @test_cvt_f32_bf8_byte0(i32 %a) {
 ; GFX940-LABEL: test_cvt_f32_bf8_byte0:
 ; GFX940:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
index ca7385be5dee7..0b8c3a414f59a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll
@@ -75,9 +75,6 @@ define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
index b59e584418bd8..f7c1fbb219ca0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll
@@ -75,9 +75,6 @@ define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
index 0093e30b03644..e2f01c71d1525 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll
@@ -154,10 +154,6 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(ptr addrspa
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
index d896090a47665..90cc437448c3d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll
@@ -154,10 +154,6 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(ptr addrspa
   ret void
 }
 
-declare <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
index 920ff8a927e2d..34664c449a88e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll
@@ -810,10 +810,6 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(ptr addrsp
   ret void
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare float @llvm.fabs.f32(float) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
index f8a1388c9415e..b3d04c92b4858 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn--amdhsa < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.dispatch.id() #1
-
 ; GCN-LABEL: {{^}}dispatch_id:
 
 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
index 4fe6eed0ef1f3..0a5d6b9f0a4ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
@@ -28,8 +28,6 @@ define amdgpu_kernel void @test2(ptr addrspace(1) %out) {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 attributes #0 = { readnone }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
index f1d3d562dda2c..4d66527bcdb21 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c)
-
 ; GCN-LABEL: {{^}}div_fixup_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
index 41eb4d2f9b4fa..d6e0a5bd8b8db 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.div.fixup.f32(float, float, float) nounwind readnone
-declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readnone
-
 ; GCN-LABEL: {{^}}test_div_fixup_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
index 76cff962f7c20..09e71dd0870a1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll
@@ -3,10 +3,6 @@
 
 ; FIXME: Enable for VI.
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) nounwind readnone
-declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind readnone
-
 ; GCN-LABEL: {{^}}test_div_fmas_f32:
 ; SI-DAG: s_load_dword [[SA:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x13
 ; SI-DAG: s_load_dword [[SB:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x1c
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
index 300c0032ac920..b2e0aae2f4acf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll
@@ -1,10 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1) #1
-declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1) #1
-declare float @llvm.fabs.f32(float) #1
-
 ; SI-LABEL: {{^}}test_div_scale_f32_1:
 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64
 ; SI-DAG: buffer_load_dword [[B:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
index 8ea10f4496a2e..1634b7d725228 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dot4.f32.ll
@@ -326,10 +326,3 @@ entry:
   ret float %ret
 }
 
-declare float @llvm.amdgcn.dot4.f32.fp8.bf8(i32 %a, i32 %b, float %c)
-declare float @llvm.amdgcn.dot4.f32.bf8.fp8(i32 %a, i32 %b, float %c)
-declare float @llvm.amdgcn.dot4.f32.fp8.fp8(i32 %a, i32 %b, float %c)
-declare float @llvm.amdgcn.dot4.f32.bf8.bf8(i32 %a, i32 %b, float %c)
-
-declare float @llvm.fabs.f32(float %a)
-
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll
index 29535726b7496..a498ec1003a2c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32, i32 immarg)
-declare i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32, i32 immarg)
-
 define amdgpu_gs void @test_add_32(i32 %arg) {
 ; CHECK-LABEL: test_add_32:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
index 1a42145ab1d81..bb473909e8b92 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll
@@ -148,8 +148,5 @@ define amdgpu_kernel void @ds_append_lds_no_use(ptr addrspace(3) %lds, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #1
-declare i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) nocapture, i1 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
index 90e18a881340b..9803d265a80a5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0
-
 ; CHECK-LABEL: {{^}}ds_bpermute:
 ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @ds_bpermute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll
index 0ce3f85425e0a..32a67027fd6f7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 
-declare { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32, i32, <4 x i32>, i32 immarg)
-
 define amdgpu_gs void @test_ds_bvh_stack(i32 %addr, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) {
 ; CHECK-LABEL: test_ds_bvh_stack:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
index 2694226ace9e7..0e4e8e0d9293c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll
@@ -144,8 +144,5 @@ define amdgpu_kernel void @ds_consume_lds_no_use(ptr addrspace(3) %lds, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #1
-declare i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) nocapture, i1 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
index 95e50da8a4709..66e34d16869c1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier-fastregalloc.ll
@@ -12,8 +12,5 @@ define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
   ret void
 }
 
-
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
index ad5e9f4eb6a63..9873105ddac9f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
@@ -238,10 +238,6 @@ define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare i32 @llvm.amdgcn.workitem.id.x() #3
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent inaccessiblememonly nounwind }
 attributes #2 = { convergent inaccessiblememonly nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
index f658ab39f771f..37cb73f9cf84f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
@@ -165,9 +165,6 @@ define amdgpu_kernel void @gws_init_wait_before(i32 %val, ptr addrspace(1) %ptr)
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent inaccessiblememonly nounwind writeonly }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
index da64f7350a921..8c5ec86bb4e4d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
@@ -30,6 +30,4 @@ define amdgpu_kernel void @gws_sema_br_offset0(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.br(i32, i32) #0
-
 attributes #0 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
index 180ea84dd7a74..11922078e0d04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
@@ -22,6 +22,4 @@ define amdgpu_kernel void @gws_sema_p_offset0(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.p(i32) #0
-
 attributes #0 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
index 818e23b5649d5..30a36b0c0dc1e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -32,6 +32,4 @@ define amdgpu_kernel void @gws_sema_release_all_offset0(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.release.all(i32) #0
-
 attributes #0 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
index 215c394409ac0..755bb7e2bacdc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
@@ -28,6 +28,4 @@ define amdgpu_kernel void @gws_sema_v_offset0(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.sema.v(i32) #0
-
 attributes #0 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
index c35bb9f1a4a01..5e0b909d63d1b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll
@@ -22,5 +22,3 @@ define amdgpu_kernel void @ds_ordered_add_4dw(ptr addrspace(2) inreg %gds, ptr a
   store i32 %val, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
index 30a7235d6a702..db2454f9a1be6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
@@ -57,5 +57,3 @@ define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) {
   %r = bitcast i32 %val to float
   ret float %r
 }
-
-declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
index da537e9676ca9..ef0b2258d5a9c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@@ -132,5 +132,3 @@ define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) {
   %r = bitcast i32 %val to float
   ret float %r
 }
-
-declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
index cee5bbbe85f48..aff8fc4896f8f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
@@ -45,5 +45,3 @@ endif:
   %r = bitcast i32 %v to float
   ret float %r
 }
-
-declare i32 @llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
index 6581e251b416a..8eb1a7e445bfb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0
-
 ; CHECK-LABEL: {{^}}ds_permute:
 ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 define amdgpu_kernel void @ds_permute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll
index 96c35df3c234f..62aea02c1490f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32, i32 immarg)
-declare i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32, i32 immarg)
-
 define amdgpu_gs void @test_sub_32(i32 %arg) {
 ; CHECK-LABEL: test_sub_32:
 ; CHECK:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
index 038ba91c0d11b..7a096eb88eacd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
-
 ; CHECK-LABEL: {{^}}ds_swizzle:
 ; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
 define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
index 3dbda351b5a17..9965e0fbfbc64 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.compr.ll
@@ -5,9 +5,6 @@
 
 ; ERR: error: <unknown>:0:0: in function test_export_compr_zeroes_v2f16 void (): intrinsic not supported on subtarget
 
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2i16(i32, i32, <2 x i16>, <2 x i16>, i1, i1) #0
-
 ; GCN-LABEL: {{^}}test_export_compr_zeroes_v2f16:
 ; GCN: exp mrt0 off, off, off, off compr{{$}}
 ; GCN: exp mrt0 off, off, off, off done compr{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
index 224de9512c493..bf4bc320da355 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
@@ -2,10 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX10,PREGFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefixes=GCN,GFX11 %s
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2
-
 ; GCN-LABEL: {{^}}test_export_zeroes_f32:
 ; GCN: exp mrt0 off, off, off, off{{$}}
 ; GCN: exp mrt0 off, off, off, off done{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
index 52441bcb82f5c..d3cf0a4afca0d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.prim.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=GCN -check-prefix=PRIM %s
 
-declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1
-
 ; GCN-LABEL: {{^}}test_export_prim_i32:
 ; NOPRIM: exp invalid_target_20 v0, off, off, off done{{$}}
 ; PRIM: exp prim v0, off, off, off done{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
index 3b64a8707b55e..9967c1c56a04b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.row.ll
@@ -2,10 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL
 
-declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32)
-declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @undef_i32() #0 {
 ; GFX11-LABEL: undef_i32:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
index 99a092e310abb..af60bc70881ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp2.ll
@@ -68,11 +68,6 @@ define half @v_fneg_fabs_exp2_f16(half %src)  {
   ret half %exp2
 }
 
-declare half @llvm.amdgcn.exp2.f16(half) #0
-declare float @llvm.amdgcn.exp2.f32(float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
index 17b941c59fd3f..1f412057517e3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll
@@ -5,13 +5,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX11 %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX10 %s
 
-declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
-declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
-declare float @llvm.fabs.f32(float) #0
-
-declare i32 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
-declare half @llvm.fabs.f16(half) #0
-
 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) {
 ; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_with_fabs:
 ; SDAG-GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
index ce055d6527996..051fd50c05da3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
@@ -6,13 +6,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL %s
 
-declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0
-declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0
-declare float @llvm.fabs.f32(float) #0
-
-declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0
-declare half @llvm.fabs.f16(half) #0
-
 define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) {
 ; GFX11-LABEL: v_fcmp_f32_oeq_with_fabs:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
index 212c286b3fe57..99435e1decff6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck %s
 
-declare float @llvm.amdgcn.fdiv.fast(float, float) #0
-
 ; CHECK-LABEL: {{^}}test_fdiv_fast:
 ; CHECK: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}, vcc
 ; CHECK: v_mul_f32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
index ca06a57be19cc..113a82d6ee666 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
@@ -3,8 +3,6 @@
 ; FIXME: GlobalIsel doesn't support BF16 for now.
 ; xUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11
 
-declare bfloat @llvm.amdgcn.fdot2.bf16.bf16(<2 x bfloat> %a, <2 x bfloat> %b, bfloat %c)
-
 define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16(
 ; GFX11-LABEL: test_llvm_amdgcn_fdot2_bf16_bf16:
 ; GFX11:       ; %bb.0: ; %entry
@@ -93,5 +91,3 @@ entry:
   store bfloat %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
index 99c3deaada8c6..aa262071cfbf5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,SDAG-GFX11
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11,GISEL-GFX11
 
-declare half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a, <2 x half> %b, half %c)
-
 define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16(
 ; GFX11-LABEL: test_llvm_amdgcn_fdot2_f16_f16:
 ; GFX11:       ; %bb.0: ; %entry
@@ -71,5 +69,3 @@ entry:
   store half %r.val, ptr addrspace(5) %r
   ret void
 }
-
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
index e51b1d2da2e41..fa68e48509ab7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare float @llvm.amdgcn.fdot2.f32.bf16(<2 x bfloat> %a, <2 x bfloat> %b, float %c, i1 %clamp)
-
 define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp(
 ; GFX11-LABEL: test_llvm_amdgcn_fdot2_f32_bf16_clamp:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
index 26e6bde97f499..ec54757e68ee4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
@@ -5,8 +5,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX12
 
-declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_fdot2_clamp
 ; GFX9:   v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
index 4b27208935633..470bf98b4658f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
@@ -103,6 +103,3 @@ define float @v_fma_const_const(float %a)  {
   %fma = call float @llvm.amdgcn.fma.legacy(float %a, float 2.0, float -1.0)
   ret float %fma
 }
-
-declare float @llvm.amdgcn.fma.legacy(float, float, float)
-declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
index 64c54caf99829..844cc6a998925 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX8 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
-declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
-
 ; GCN-LABEL: {{^}}mad_f16:
 ; GCN: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}}
 define amdgpu_kernel void @mad_f16(
@@ -109,5 +107,3 @@ define amdgpu_kernel void @mad_f16_neg_abs_b(
   store half %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare half @llvm.fabs.f16(half)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
index 4a735a727229b..3ef242e24c790 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
-
 ; GCN-LABEL: {{^}}mad_f32:
 ; GCN:  v_ma{{[dc]}}_f32
 define amdgpu_kernel void @mad_f32(
@@ -112,5 +110,3 @@ define amdgpu_kernel void @mad_f32_neg_abs_b(
   store float %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
index 38608387b3a06..8178eafb60cc5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
@@ -32,8 +32,5 @@ define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %sr
   ret void
 }
 
-declare half @llvm.amdgcn.fmed3.f16(half, half, half) #0
-declare half @llvm.fabs.f16(half) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
index 588b8c35a56ac..41d2ffcee28a9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -99,8 +99,5 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspac
   ret void
 }
 
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #0
-declare float @llvm.fabs.f32(float) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
index d318bc80e4976..fcc748e7a39ed 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
@@ -102,9 +102,6 @@ define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "denormal-fp-math"="preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll
index 7354ed51d8869..6cd7158b2b95f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.fract.f16(half %a)
-
 ; GCN-LABEL: {{^}}fract_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_fract_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
index 71cde1eece262..62df09ea3306e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.fract.f32(float) #0
-declare double @llvm.amdgcn.fract.f64(double) #0
-
 ; GCN-LABEL: {{^}}v_fract_f32:
 ; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
index 97eb86fb0ec9d..bc75d977e43f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a)
-
 ; GCN-LABEL: {{^}}frexp_exp_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
index 43f2a5ad73092..ba05c98c63ec7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
@@ -1,12 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s  | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.copysign.f32(float, float) #0
-declare double @llvm.fabs.f64(double) #0
-declare i32 @llvm.amdgcn.frexp.exp.i32.f32(float) #0
-declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0
-
 ; GCN-LABEL: {{^}}s_test_frexp_exp_f32:
 ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll
index 7085932137ac3..4855126140217 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.frexp.mant.f16(half %a)
-
 ; GCN-LABEL: {{^}}frexp_mant_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_frexp_mant_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
index a27034a852061..16b21db82ba53 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
@@ -1,11 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s  | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-declare float @llvm.amdgcn.frexp.mant.f32(float) #0
-declare double @llvm.amdgcn.frexp.mant.f64(double) #0
-
 ; GCN-LABEL: {{^}}s_test_frexp_mant_f32:
 ; GCN: v_frexp_mant_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
index 434fa1bf7b340..0185619bb1735 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.atomic.ordered.add.b64.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
 
-declare i64 @llvm.amdgcn.global.atomic.ordered.add.b64(ptr addrspace(1), i64)
-
 define amdgpu_kernel void @global_atomic_ordered_add_b64_no_rtn(ptr addrspace(1) %addr, i64 %in) {
 ; GFX12-SDAG-LABEL: global_atomic_ordered_add_b64_no_rtn:
 ; GFX12-SDAG:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll
index 7362baf6bab95..29b9f19aaa1dd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll
@@ -5,8 +5,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL
 
-declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
-
 define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
 ; GFX900-LABEL: global_load_lds_dword_vaddr:
 ; GFX900:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
index b4415c12926ac..ff4bdb8ba2143 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w32.ll
@@ -2,11 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W32 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W32 %s
 
-declare <2 x i32> @llvm.amdgcn.global.load.tr.v2i32.p1(ptr addrspace(1))
-declare <8 x i16> @llvm.amdgcn.global.load.tr.v8i16.p1(ptr addrspace(1))
-declare <8 x half> @llvm.amdgcn.global.load.tr.v8f16.p1(ptr addrspace(1))
-declare <8 x bfloat> @llvm.amdgcn.global.load.tr.v8bf16.p1(ptr addrspace(1))
-
 define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
 ; GFX12-SDAG-W32-LABEL: global_load_tr_b64:
 ; GFX12-SDAG-W32:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
index 7ad1416789de7..54b660b555015 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.tr-w64.ll
@@ -2,11 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-SDAG-W64 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GFX12-GISEL-W64 %s
 
-declare i32 @llvm.amdgcn.global.load.tr.i32.p1(ptr addrspace(1))
-declare <4 x i16> @llvm.amdgcn.global.load.tr.v4i16.p1(ptr addrspace(1))
-declare <4 x half> @llvm.amdgcn.global.load.tr.v4f16.p1(ptr addrspace(1))
-declare <4 x bfloat> @llvm.amdgcn.global.load.tr.v4bf16.p1(ptr addrspace(1))
-
 define amdgpu_kernel void @global_load_tr_b64(ptr addrspace(1) %addr, ptr addrspace(1) %use) {
 ; GFX12-SDAG-W64-LABEL: global_load_tr_b64:
 ; GFX12-SDAG-W64:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
index da4950f7b612c..dd0529e28f968 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
@@ -66,8 +66,5 @@ define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %id
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
index 309fd99031155..bf3839ce5359c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll
@@ -13,11 +13,6 @@
 
 ; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
 
-declare i32 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
-
 define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) {
 ; SDAG-GFX11-LABEL: v_icmp_i32_eq:
 ; SDAG-GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
index 5f979e0177f58..88538c872b932 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll
@@ -16,11 +16,6 @@
 
 ; ERR: warning: Instruction selection used fallback path for v_icmp_i1_ne0
 
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
-declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
-declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
-
 define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) {
 ; GFX11-LABEL: v_icmp_i32_eq:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
index dba67a03c000e..06c4471120c00 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
@@ -283,10 +283,5 @@ entry:
   ret void
 }
 
-
-declare void @llvm.amdgcn.iglp.opt(i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) #1
-
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
index 31ba2f224bbae..65fb268588cb7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.dim.ll
@@ -1294,53 +1294,6 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
index 7c8a395b488f3..e6b105c160b7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.a16.encode.ll
@@ -1460,53 +1460,6 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-
-declare float @llvm.amdgcn.image.load.1d.f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i16(i32, i16, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i16(float, i32, i16, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i16(<2 x float>, i32, i16, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
index f13b897971707..79e0f6b7e4155 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.ll
@@ -282,31 +282,6 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.sub.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umin.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.smax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.umax.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.and.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.or.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.xor.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.inc.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.dec.1d.i32.i32(i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64, i32, <8 x i32>, i32, i32) #0
-declare i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64, i64, i32, <8 x i32>, i32, i32) #0
-
-declare i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.3d.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.cube.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.1darray.i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darray.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2dmsaa.i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare i32 @llvm.amdgcn.image.atomic.add.2darraymsaa.i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.flt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.flt.ll
index 71acac7922325..188bf5fd91d86 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.flt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.flt.ll
@@ -34,7 +34,3 @@ main_body:
   %v = call float @llvm.amdgcn.image.atomic.add.flt.1d.f32.f32(float %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
   ret float %v
 }
-
-declare float @llvm.amdgcn.image.atomic.add.flt.1d.f32.f32(float, i32, <8 x i32>, i32, i32)
-declare float @llvm.amdgcn.image.atomic.min.flt.1d.f32.f32(float, i32, <8 x i32>, i32, i32)
-declare float @llvm.amdgcn.image.atomic.max.flt.1d.f32.f32(float, i32, <8 x i32>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
index 12f9029392a43..817aa5006d29e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.pk.add.ll
@@ -237,9 +237,3 @@ main_body:
   %unused = call <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat> %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 2)
   ret float 1.0
 }
-
-
-declare <2 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v2f16.v2f16(<2 x half>, i32, <8 x i32>, i32, i32)
-declare <4 x half> @llvm.amdgcn.image.atomic.pk.add.f16.1d.v4f16.v4f16(<4 x half>, i32, <8 x i32>, i32, i32)
-declare <2 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v2bf16.v2bf16(<2 x bfloat>, i32, <8 x i32>, i32, i32)
-declare <4 x bfloat> @llvm.amdgcn.image.atomic.pk.add.bf16.1d.v4bf16.v4bf16(<4 x bfloat>, i32, <8 x i32>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
index b4f05bce37680..969c1f5fd7021 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll
@@ -168,22 +168,6 @@ main_body:
   ret void
 }
 
-declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index 38c1f10f2c011..5d8eac983c468 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -289,42 +289,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
index db72e6c6d0c17..8e77c0b3b1fd7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
@@ -4319,70 +4319,6 @@ define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %
   ret float %tex
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {float,i32} @llvm.amdgcn.image.load.mip.2d.f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <3 x float> @llvm.amdgcn.image.getresinfo.1d.v3f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <2 x float> @llvm.amdgcn.image.getresinfo.1d.v2f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare float @llvm.amdgcn.image.getresinfo.1d.f32.i32(i32 immarg, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2
-
-declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
index 3f88ab1f64912..3dd3a7ccaf0af 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.a16.dim.ll
@@ -481,25 +481,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 3a5a60896ee24..fceddb945a488 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -22,8 +22,6 @@ main_body:
   ret <2 x float> %r
 }
 
-declare <4 x half> @llvm.amdgcn.image.gather4.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
index b5faae131d983..6c04951417624 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.dim.ll
@@ -190,26 +190,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
index e7a57d51a0dc0..de198c9ee5035 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.o.dim.ll
@@ -100,22 +100,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
index fe65d6e104bb6..756dee1d73a07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.getlod.dim.ll
@@ -36,8 +36,4 @@ main_body:
   ret <4 x float> %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.getlod.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.image.getlod.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.getlod.2d.v2f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
index 5ffdbb0f8c5b0..405f62b0ec940 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.2dmsaa.ll
@@ -1225,16 +1225,4 @@ main_body:
   ret [4 x float] %i7
 }
 
-declare float @llvm.amdgcn.image.load.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.load.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.image.load.2dmsaa.v3f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare float @llvm.amdgcn.image.load.2darraymsaa.f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.image.load.2darraymsaa.v2f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare <3 x float> @llvm.amdgcn.image.load.2darraymsaa.v3f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare half @llvm.amdgcn.image.load.2dmsaa.f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-declare void @llvm.amdgcn.image.store.2dmsaa.f32.i32(float, i32, i32, i32, i32, <8 x i32>, i32, i32)
-
 attributes #0 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
index 9a5d4855e8afc..c9fb5c639b650 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.d16.ll
@@ -147,9 +147,5 @@ main_body:
   ret <4 x half> %v
 }
 
-declare <4 x half> @llvm.amdgcn.image.load.1d.v4f16.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
-declare <4 x half> @llvm.amdgcn.image.load.3d.v4f16.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
index 3e5a5243a69e4..0c7307119dd2e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll
@@ -147,9 +147,5 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
index 1348315e72e7b..91ee687f1f015 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
@@ -257,19 +257,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare <4 x half> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32.i16(i32, i16, i16, i16, i16, <8 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
index b5b5944fffda9..0a14aa9d12fc0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
@@ -122,14 +122,5 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-declare float @llvm.amdgcn.image.msaa.load.x.2dmsaa.f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
index 4cd761b555d6e..ce096963a1ef3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.nsa.ll
@@ -151,15 +151,5 @@ define amdgpu_ps float @sample_def_undef_undef_undef(<8 x i32> inreg %rsrc, <4 x
   ret float %r
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32(i32, float, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.rint.f32(float) #2
-declare float @llvm.amdgcn.image.sample.c.b.1darray.f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
index 42005499bded8..20df99075d541 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.a16.dim.ll
@@ -1619,53 +1619,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <8 x float> @llvm.amdgcn.image.sample.1d.v8f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32, half, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32, half, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
index 6027d73418419..eddb8e7fac348 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll
@@ -176,15 +176,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
index 28a0611511662..351ecbe794d98 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.dim.ll
@@ -164,15 +164,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
index 59f5a27087240..952a58484c7d4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.encode.ll
@@ -99,15 +99,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
index 0e8770f3da038..f187dd36f9875 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.g16.ll
@@ -99,15 +99,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
index 48491729f109a..bcf6e46a28b3d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
@@ -577,18 +577,6 @@ main_body:
   ret <4 x float> %r
 }
 
-declare half @llvm.amdgcn.image.sample.2d.f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.sample.2d.v3f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.2d.v4f16.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x half>,i32} @llvm.amdgcn.image.sample.2d.v2f16i32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x half> @llvm.amdgcn.image.sample.c.d.1d.v2f16.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x half>,i32} @llvm.amdgcn.image.sample.c.d.1d.v2f16i32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <3 x half> @llvm.amdgcn.image.sample.b.2d.v3f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<3 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v3f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x half> @llvm.amdgcn.image.sample.b.2d.v4f16.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<4 x half>,i32} @llvm.amdgcn.image.sample.b.2d.v4f16i32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
index 46191c7b0440e..b28bc6d64d2d9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll
@@ -81,10 +81,3 @@ main_body:
   %v1 = extractelement <4 x float> %v, i32 0
   ret float %v1
 }
-
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index c8421c66f97c3..a76d11cd662cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -2713,54 +2713,6 @@ main_body:
   ret <2 x float> %out
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
index 42fa415b5da62..752c4dc5b7937 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll
@@ -429,28 +429,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; GFX10-LABEL: sample_g16_noa16_d_1d:
 ; GFX10:       ; %bb.0: ; %main_body
@@ -868,28 +846,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; GFX10-LABEL: sample_d_1d_g16_a16:
 ; GFX10:       ; %bb.0: ; %main_body
@@ -966,10 +922,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half,  half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
index aa24ac394730e..839704721b042 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.encode.ll
@@ -310,19 +310,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
index 449c4ebec889d..189097cd63320 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.ll
@@ -310,19 +310,6 @@ main_body:
   ret <2 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
index 67e6bb7e761f5..ad86c42b8d69c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.o.dim.ll
@@ -321,51 +321,6 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
index 554b961beaf7b..6849f69de667f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.d16.ll
@@ -388,9 +388,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.1d.v4f16.i16(<4 x half>, i32, i16, <8 x i32>, i32, i32) #2
-declare void @llvm.amdgcn.image.store.2d.v4f16.i16(<4 x half>, i32, i16, i16, <8 x i32>, i32, i32) #2
-declare void @llvm.amdgcn.image.store.3d.v4f16.i16(<4 x half>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
index 5d4c91ad8a510..41e54d883c63a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.store.a16.ll
@@ -376,9 +376,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.image.store.1d.v4f32.i16(<4 x float>, i32, i16, <8 x i32>, i32, i32) #2
-declare void @llvm.amdgcn.image.store.2d.v4f32.i16(<4 x float>, i32, i16, i16, <8 x i32>, i32, i32) #2
-declare void @llvm.amdgcn.image.store.3d.v4f32.i16(<4 x float>, i32, i16, i16, i16, <8 x i32>, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
index 77f57b0322711..ad2ea61a95f73 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.hsa.ll
@@ -16,7 +16,5 @@ define void @test_func(ptr addrspace(1) %out) #1 {
   ret void
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind  }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
index 8598b78deccf5..85a31ae55dae8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.ptr.buffer.ll
@@ -27,7 +27,5 @@ define amdgpu_cs i32 @test_cs() #1 {
   ret i32 %value
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index 70eff49450153..d50fe90144f50 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -307,9 +307,6 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>
 ; HSA-NEXT:    .kernarg_segment_size: 120
 ; HSA-LABEL:   .name:           kernel_implicitarg_no_struct_align_padding
 
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2
-
 attributes #0 = { nounwind noinline }
 attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
index 3a540bdf5b53d..03123c0dabe2f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll
@@ -199,7 +199,4 @@ endif:
   ret float %v
 }
 
-declare void @llvm.amdgcn.init.exec(i64) #1
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32) #1
-
 attributes #1 = { convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
index d3961c04838cf..2af4f173e00ec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.wave32.ll
@@ -33,6 +33,3 @@ main_body:
   call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
   ret float %s
 }
-
-declare void @llvm.amdgcn.init.exec(i64)
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
index 5d2e10756c372..cf2ab959ded87 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.f16.ll
@@ -179,9 +179,6 @@ main_body:
 }
 
 ; float @llvm.amdgcn.interp.p1.f16(i, attrchan, attr, high, m0)
-declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32) #0
 ; half @llvm.amdgcn.interp.p1.f16(p1, j, attrchan, attr, high, m0)
-declare half @llvm.amdgcn.interp.p2.f16(float, float, i32, i32, i1, i32) #0
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
index 429528e9091d1..51984a1826a48 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll
@@ -167,13 +167,5 @@ main_body:
   ret half %res
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.inreg.p10(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p2(float, float, float) #0
-declare float @llvm.amdgcn.interp.inreg.p10.f16(float, float, float, i1) #0
-declare half @llvm.amdgcn.interp.inreg.p2.f16(float, float, float, i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.f16(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
index ce5698c4a1386..c39638a91bd00 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll
@@ -214,13 +214,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
index 8d0397c936a03..9d0c04a4d8a72 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll
@@ -9,11 +9,6 @@
 ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float3 ray_origin, float3 ray_dir, float3 ray_inv_dir, uint4 texture_descr)
 ; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float3 ray_origin, half3 ray_dir, half3 ray_inv_dir, uint4 texture_descr)
 
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <3 x float>, <3 x float>, <3 x float>, <4 x i32>)
-declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <3 x float>, <3 x half>, <3 x half>, <4 x i32>)
-
 ; ERR: in function image_bvh_intersect_ray{{.*}}intrinsic not supported on subtarget
 ; Arguments are flattened to represent the actual VGPR_A layout, so we have no
 ; extra moves in the generated kernel.
@@ -523,5 +518,3 @@ main_body:
   store <4 x i32> %v, ptr undef
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll
index 71ed71cd84bcd..4f8b288f6e553 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i32.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11,GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11,SDAG %s
 
-declare i1 @llvm.amdgcn.inverse.ballot(i32)
-
 ; Test ballot(0)
 define amdgpu_cs void @constant_false_inverse_ballot(ptr addrspace(1) %out) {
 ; GFX11-LABEL: constant_false_inverse_ballot:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
index 2e3dc11feed1e..f07cf59622f8b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.inverse.ballot.i64.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-wavefrontsize32,+wavefrontsize64 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG %s
 
-declare i1 @llvm.amdgcn.inverse.ballot.i64(i64)
-
 ; Test ballot(0)
 define amdgpu_cs void @constant_false_inverse_ballot(ptr addrspace(1) %out) {
 ; GISEL-LABEL: constant_false_inverse_ballot:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
index bc10eb68d75cb..f8112dbe34e55 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll
@@ -49,9 +49,6 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0
-
 attributes #0 = { nounwind readnone speculatable }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
index aad4d924952ff..3a44426a4e667 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll
@@ -48,9 +48,6 @@ bb1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0
-
 attributes #0 = { nounwind readnone speculatable }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 8dba22312ac88..a1a11e9ed9a1b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -116,9 +116,6 @@ define ptr addrspace(4) @func_kernarg_segment_ptr() {
   ret ptr addrspace(4) %ptr
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "amdgpu-implicitarg-num-bytes"="0" }
 attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
index 8f2bdff016345..7dd045247dd7e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kill.ll
@@ -318,9 +318,4 @@ bb35:                                             ; preds = %bb33, %.entry
   ret void
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
-declare i1 @llvm.amdgcn.wqm.vote(i1)
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
index 6d1ca3fddce5e..957d05d4fc835 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.direct.load.ll
@@ -45,8 +45,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.lds.direct.load(i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
index 61818dafd2b84..920202736f557 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll
@@ -1,9 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.lds.kernel.id()
-declare i32 @llvm.amdgcn.workgroup.id.x()
-
 define void @function_lds_id(ptr addrspace(1) %out) {
 ; GCN-LABEL: function_lds_id:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
index 924d9eb7e6c29..55e5ecf1fe765 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.param.load.ll
@@ -49,8 +49,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.lds.param.load(i32, i32, i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
index 9a2715b2ebc20..3987c57ae9ebf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_lerp:
 ; GCN: v_lerp_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_lerp(ptr addrspace(1) %out, i32 %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
index b0a2d10eebafe..5967f9db6853a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll
@@ -3,8 +3,6 @@
 
 ; ERR: intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.log.clamp.f32(float) #0
-
 ; GCN-LABEL: {{^}}v_log_clamp_f32:
 ; GCN: v_log_clamp_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @v_log_clamp_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
index ebdda381a0762..42ff4f5d62d5d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.ll
@@ -68,11 +68,6 @@ define half @v_fneg_fabs_log_f16(half %src)  {
   ret half %log
 }
 
-declare half @llvm.amdgcn.log.f16(half) #0
-declare float @llvm.amdgcn.log.f32(float) #0
-declare float @llvm.fabs.f32(float) #0
-declare half @llvm.fabs.f16(half) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll
index 18c977bf5100d..d213b053f38cd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.make.buffer.rsrc.ll
@@ -156,8 +156,3 @@ define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i3
   %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0)
   ret float %value
 }
-
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32)
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) nocapture readnone, i16, i32, i32)
-declare float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg)
-declare float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
index 4ac9dc8565ff4..3a68833dc24c4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll
@@ -87,9 +87,5 @@ define i32 @mbcnt_hi_known_bits_4(i32 %x) #0 {
   ret i32 %mask
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
index 6b6fb30da3786..eb27fc528417e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll
@@ -2,13 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x2bf16:
 ; GCN-DAG:         v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
index 8a460154e4789..9254033515392 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll
@@ -1,15 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x double>, i32, i32, i32)
-declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x4bf16_1k:
 ; GCN-DAG:     s_load_dwordx16
 ; GCN-DAG:     s_load_dwordx16
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
index 702e513aff4c7..84fe9f019bcca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll
@@ -3,33 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s
 
-declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64, i64, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64, i64, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half>, <8 x half>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half>, <8 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16>, <8 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16>, <8 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32>, <4 x i32>, <16 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_i32_16x16x32i8:
 ; GFX940-DAG:  v_mov_b32_e32 v[[ONE:[0-9]+]], 1
 ; GFX940-DAG:  v_mov_b32_e32 v[[TWO:[0-9]+]], 2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
index d5ccc288f6a00..d5207dd108ac6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX908 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
 
-declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_i32_32x32x8i8:
 ; GCN-DAG:         v_mov_b32_e32 [[TWO:v[0-9]+]], 2
 ; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 54023770ed0ff..4e8135a9830e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -3,21 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_40 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,GFX90A_40 %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float, float, <4 x float>, i32, i32, i32)
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32)
-declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32, i32, <16 x i32>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32:
 ; GCN-DAG:        v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GCN-DAG:        v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
index 96975bd93558e..7b30b48fb49b1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll
@@ -91,8 +91,5 @@ define amdgpu_kernel void @mov_dpp64_imm_test(ptr addrspace(1) %out) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.mov.dpp.i64(i64, i32, i32, i32, i1) #0
-
 attributes #0 = { nounwind readnone convergent }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
index 8bff17b729927..0c4ad1c5d8b4b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll
@@ -24,6 +24,4 @@ define amdgpu_kernel void @dpp8_wait_states(ptr addrspace(1) %out, i32 %in) {
   ret void
 }
 
-declare i32 @llvm.amdgcn.mov.dpp8.i32(i32, i32) #0
-
 attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
index 3a5519a90e191..d786910cd8253 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0
-
 ; GCN-LABEL: {{^}}v_mqsad_pk_u16_u8:
 ; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
index 9e6a161ab7aba..d87f9b3f641e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0
-
 ; GCN-LABEL: {{^}}v_mqsad_u32_u8_inline_integer_immediate:
 ; GCN-DAG: v_mov_b32_e32 v0, v2
 ; GCN-DAG: v_mov_b32_e32 v1, v3
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
index 63d71a1e34949..06aa8c678bde7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_msad_u8:
 ; GCN: v_msad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_msad_u8(ptr addrspace(1) %out, i32 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
index 9944352f07dd9..3c5b7f9206cd1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll
@@ -8,7 +8,5 @@ define amdgpu_kernel void @test_mul_i24(ptr addrspace(1) %out, i32 %src1, i32 %s
   ret void
 }
 
-declare i32 @llvm.amdgcn.mul.i24(i32, i32) #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
index 6768475b1460b..0f0ee288232cd 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll
@@ -8,7 +8,5 @@ define amdgpu_kernel void @test_mul_u24(ptr addrspace(1) %out, i32 %src1, i32 %s
   ret void
 }
 
-declare i32 @llvm.amdgcn.mul.u24(i32, i32) #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll
index 5a3767381cff6..40af0e045a3f6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.i24.ll
@@ -11,5 +11,3 @@ define i32 @basic(i32 %a, i32 %b) {
   %mul = call i32 @llvm.amdgcn.mulhi.i24(i32 %a, i32 %b)
   ret i32 %mul
 }
-
-declare i32 @llvm.amdgcn.mulhi.i24(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll
index db325a2d237a6..7dfba368ba776 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mulhi.u24.ll
@@ -11,5 +11,3 @@ define i32 @basic(i32 %a, i32 %b) {
   %mul = call i32 @llvm.amdgcn.mulhi.u24(i32 %a, i32 %b)
   ret i32 %mul
 }
-
-declare i32 @llvm.amdgcn.mulhi.u24(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
index f5f51f61eafe1..81e24a4f3529a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
 ; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
 define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, ptr addrspace(1) %out) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
index 265d64f47bb23..e5cae7e00cc47 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll
@@ -6,11 +6,6 @@
 ; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s
 ; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s
 
-declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
-
 define amdgpu_kernel void @v_permlane16_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
 ; GFX10-LABEL: v_permlane16_b32_vss:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
index 973678291e263..fd589fd5594f5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane16.var.ll
@@ -2,11 +2,6 @@
 ; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
 ; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
 
-declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
-
 define amdgpu_kernel void @v_permlane16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) {
 ; GFX12-SDAG-LABEL: v_permlane16var_b32_vv:
 ; GFX12-SDAG:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
index b81cb97725648..06425dcf2d812 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 
-declare i32 @llvm.amdgcn.permlane64(i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @test_s(ptr addrspace(1) %out, i32 %src0) {
 ; GFX11-LABEL: test_s:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
index 955d8ae5cc054..004e44d4b22ea 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -56,9 +56,6 @@ end:
   ret float %r
 }
 
-declare i1 @llvm.amdgcn.ps.live() #1
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
index f81be1a0308d0..1394fb70432c3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0
-
 ; GCN-LABEL: {{^}}v_qsad_pk_u16_u8:
 ; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 ; GCN-DAG: v_mov_b32_e32 v5, v1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
index c951a07a44ae7..8c35e10105cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.quadmask.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 
-declare i32 @llvm.amdgcn.s.quadmask.i32(i32)
-declare i64 @llvm.amdgcn.s.quadmask.i64(i64)
-
 define i32 @test_quadmask_constant_zero_i32() {
 ; GFX11-LABEL: test_quadmask_constant_zero_i32:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
index 36d2319788713..a4165f503a670 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll
@@ -13,8 +13,6 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
   ret void
 }
 
-declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
-
 attributes #0 = { nounwind readnone }
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
index 5c917c97e261f..387dc66013062 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.fadd.ll
@@ -66,7 +66,4 @@ define amdgpu_ps void @raw_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgp
   ret void
 }
 
-declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
index 4d80e4ce5af14..45c962842f5f7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.atomic.ll
@@ -129,20 +129,4 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.raw.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare float @llvm.amdgcn.raw.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll
index 8e064ab97d1aa..de8c279587278 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.d16.ll
@@ -47,8 +47,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.raw.buffer.load.format.f16(<4 x i32>, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.raw.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
index 60c6268e448cb..9ecc7178d3133 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.format.ll
@@ -80,8 +80,4 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll
index ffd055e973ed0..abef17c45a94b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN
 
-declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
-
 define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) {
 ; GCN-LABEL: buffer_load_lds_dword:
 ; GCN:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
index 0c6bba2426947..84e59c16e8799 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -1378,19 +1378,4 @@ main_body:
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
   ret void
 }
-
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.raw.buffer.load.i8(<4 x i32>, i32, i32, i32) #0
-declare i16 @llvm.amdgcn.raw.buffer.load.i16(<4 x i32>, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.raw.buffer.load.v2i16(<4 x i32>, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.raw.buffer.load.v4i16(<4 x i32>, i32, i32, i32) #0
-declare half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32>, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32>, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32>, i32, i32, i32) #0
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
index ca03d954d5b2a..1d64ef823fb1b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.d16.ll
@@ -74,8 +74,3 @@ main_body:
   call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %voffset, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32)
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
index 2fe162cb56e6f..1dbec62fbbdb1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.format.ll
@@ -67,10 +67,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
index 75da1adc3123c..c3845f22fced5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
@@ -575,20 +575,5 @@ define amdgpu_ps void @raw_buffer_store_x1_offset_swizzled_not_merged(<4 x i32>
   ret void
 }
 
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
-declare void @llvm.amdgcn.raw.buffer.store.i8(i8, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.f16(half, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.i16(i16, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
index ccb3c4fa19e82..9500dfcf281b3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.ll
@@ -66,7 +66,4 @@ define amdgpu_ps void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc_
   ret void
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
index 9c9cf36baecf1..4783c069dba5e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.ll
@@ -149,20 +149,4 @@ main_body:
   ret void
 }
 
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #0
-declare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll
index c27118446cc2f..4bc7ea3395c85 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.d16.ll
@@ -47,8 +47,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.raw.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
index 81aee1e9c4488..d16ffd257dbb8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.format.ll
@@ -80,8 +80,4 @@ main_body:
   ret <2 x float> %data
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll
index 51a8b97ace9f5..0389e9b9b4aca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.lds.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN
 
-declare void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
-
 define amdgpu_ps float @buffer_load_lds_dword(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
 ; GCN-LABEL: buffer_load_lds_dword:
 ; GCN:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
index 04d221325a591..c7649112cc0e3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
@@ -1125,19 +1125,4 @@ main_body:
   call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
   ret void
 }
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0
-declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0
-declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
index 8e278c101057a..94c5d4540ab01 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.d16.ll
@@ -74,8 +74,3 @@ main_body:
   call void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
index d2c9b4b73f72e..714b1c4cc161a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.format.ll
@@ -67,10 +67,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
index 1251d2e3bba55..9a31eb3b00ab6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
@@ -307,20 +307,5 @@ define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_swizzled_not_merged(ptr ad
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
index cc75531e4953e..06820c9d1fd07 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.d16.ll
@@ -135,8 +135,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.ll
index 8bac51994cc48..de9020bef5ce5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.load.ll
@@ -331,8 +331,3 @@ define amdgpu_vs float @buffer_load_x(ptr addrspace(8) inreg %rsrc) {
     %vdata.f = bitcast i32 %vdata to float
     ret float %vdata.f
 }
-
-declare i32 @llvm.amdgcn.raw.ptr.tbuffer.load.i32(ptr addrspace(8), i32, i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.raw.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
index cb511c93f67ed..a53b5bbc41956 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.d16.ll
@@ -209,8 +209,3 @@ main_body:
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
index 22396631b38df..3ac53ae2e966f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.tbuffer.store.ll
@@ -278,10 +278,5 @@ main_body:
   call void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float> %data, ptr addrspace(8) %rsrc, i32 16777212, i32 0, i32 63, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
index bd7e9d2dfa32d..0b0881e700480 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.d16.ll
@@ -163,8 +163,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
index 73a27dc62695c..cf1a1fd3f583a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll
@@ -414,8 +414,3 @@ define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
     %vdata.f = bitcast i32 %vdata to float
     ret float %vdata.f
 }
-
-declare i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
index 01df7634f0e9c..ad9baa7ccf5c1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.d16.ll
@@ -277,8 +277,3 @@ main_body:
   call void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
index 8641bf1b03f36..38363538d4229 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.store.ll
@@ -362,10 +362,5 @@ main_body:
   call void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float> %data, <4 x i32> %rsrc, i32 16777212, i32 0, i32 63, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.raw.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll
index b78ea1033baac..8e543833ff37f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.rcp.f16(half %a)
-
 ; GCN-LABEL: {{^}}rcp_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_rcp_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
index 4f4e9482e6ced..79a56dd1a85e6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll
@@ -3,8 +3,6 @@
 
 ; ERROR: error: <unknown>:0:0: in function rcp_legacy_f32 void (ptr addrspace(1), float): intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.rcp.legacy(float) #0
-
 ; GCN-LABEL: {{^}}rcp_legacy_f32:
 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @rcp_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
index f4663e9daccc8..0eccde57456c4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll
@@ -1,13 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rcp.f32(float) #0
-declare double @llvm.amdgcn.rcp.f64(double) #0
-
-declare double @llvm.amdgcn.sqrt.f64(double) #0
-declare float @llvm.amdgcn.sqrt.f32(float) #0
-declare double @llvm.sqrt.f64(double) #0
-declare float @llvm.sqrt.f32(float) #0
-
 ; FUNC-LABEL: {{^}}rcp_undef_f32:
 ; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000
 ; SI-NOT: [[NAN]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
index 0284f44f5f14d..8cf7c8557892b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
 
-declare i32 @llvm.amdgcn.readfirstlane(i32) #0
-
 ; CHECK-LABEL: {{^}}test_readfirstlane:
 ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v2
 define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
index 51465f6bd10ce..36a00f829c850 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
 
-declare i32 @llvm.amdgcn.readlane(i32, i32) #0
-
 ; CHECK-LABEL: {{^}}test_readlane_sreg_sreg:
 ; CHECK-NOT: v_readlane_b32
 define amdgpu_kernel void @test_readlane_sreg_sreg(i32 %src0, i32 %src1) #1 {
@@ -77,8 +75,6 @@ define amdgpu_kernel void @test_readlane_copy_from_sgpr(ptr addrspace(1) %out) #
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind readnone convergent }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index f52461b6b3807..725f4710addee 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -12,9 +12,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1132DAGISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1132GISEL %s
 
-declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: uniform_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index bfdb2da6dc6a4..df7e0d7a4d4ca 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -12,10 +12,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1132DAGISEL %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1132GISEL %s
 
-
-declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: uniform_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
index 1cc592b74b595..6e2dd8938c21e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
-declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
-
 ; FUNC-LABEL: {{^}}rsq_clamp_f32:
 ; SI: v_rsq_clamp_f32_e32
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll
index 8c003c96dddfe..57c2957dc1f76 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.rsq.f16(half %a)
-
 ; GCN-LABEL: {{^}}rsq_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_rsq_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
index 1ceca55440ada..4f702b17f70b8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.legacy(float) #0
-
 ; FUNC-LABEL: {{^}}rsq_legacy_f32:
 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
index 1b9086efb19a9..e5af4fe237575 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare float @llvm.amdgcn.rsq.f32(float) #0
-declare double @llvm.amdgcn.rsq.f64(double) #0
-
 ; FUNC-LABEL: {{^}}rsq_f32:
 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index a7d3115af29bf..e56605de45d6d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -169,9 +169,6 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
index 4ab5e97964a85..5af29187cbe82 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll
@@ -1371,19 +1371,6 @@ entry:
   store i32 %tmp4, ptr addrspace(1) %tmp1
   ret void
 }
-declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.s.barrier.wait(i16) #1
-declare void @llvm.amdgcn.s.barrier.signal(i32) #1
-declare void @llvm.amdgcn.s.barrier.signal.var(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32) #1
-declare void @llvm.amdgcn.s.barrier.init(i32, i32) #1
-declare void @llvm.amdgcn.s.barrier.join(i32) #1
-declare i1 @llvm.amdgcn.s.barrier.leave() #1
-declare void @llvm.amdgcn.s.wakeup.barrier(i32) #1
-declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
-declare i32 @llvm.amdgcn.s.get.barrier.state.var(i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
 
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
index 074489b9ff505..3806c9abb5e3a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll
@@ -1545,12 +1545,6 @@ define amdgpu_ps i32 @s_buffer_load_imm_1028(<4 x i32> inreg %desc) {
   ret i32 %load
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1)
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
-declare <3 x i32> @llvm.amdgcn.s.buffer.load.v3i32(<4 x i32>, i32, i32)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
-
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX6789: {{.*}}
 ; GFX789: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
index 12388878f05c2..dff217002704e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.inv() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_dcache_inv:
 ; GCN-NEXT: ; %bb.0:
 ; SI-NEXT: s_dcache_inv ; encoding: [0x00,0x00,0xc0,0xc7]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
index 57f8f6073f1cd..fae4166090bed 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -show-mc-encoding < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.inv.vol() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_dcache_inv_vol:
 ; GCN-NEXT: ; %bb.0:
 ; CI-NEXT: s_dcache_inv_vol ; encoding: [0x00,0x00,0x40,0xc7]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
index 4bdb0ee7ee4c8..61b596a30f1f7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.wb() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
-
 ; VI-LABEL: {{^}}test_s_dcache_wb:
 ; VI-NEXT: ; %bb.0:
 ; VI-NEXT: s_dcache_wb ; encoding: [0x00,0x00,0x84,0xc0,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
index b137cece5e617..a5e2d0c07da32 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -show-mc-encoding < %s | FileCheck -check-prefix=VI %s
 
-declare void @llvm.amdgcn.s.dcache.wb.vol() #0
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
-
 ; VI-LABEL: {{^}}test_s_dcache_wb_vol:
 ; VI-NEXT: ; %bb.0:
 ; VI-NEXT: s_dcache_wb_vol ; encoding: [0x00,0x00,0x8c,0xc0,0x00,0x00,0x00,0x00]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
index 69ed9d553667f..b177f041328c4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.decperflevel.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.decperflevel(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_decperflevel:
 ; GCN: s_decperflevel 0{{$}}
 ; GCN: s_decperflevel 1{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
index ffab3449f7cb9..1b8f8da576d78 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
-declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0
-
 ; GCN-LABEL: {{^}}test_s_get_waveid_in_workgroup:
 ; GFX10: global_store_dword
 ; GFX10: s_get_waveid_in_workgroup [[DEST:s[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
index 77bea2f4b4d50..7a03c50fc6d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.s.getpc() #0
-
 ; GCN-LABEL: {{^}}test_s_getpc:
 ; GCN: s_load_dwordx2
 ; GCN-DAG: s_getpc_b64 s{{\[[0-9]+:[0-9]+\]}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
index 52bdfbd62680f..088a2fa96fb47 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll
@@ -26,7 +26,5 @@ define amdgpu_kernel void @readnone_s_getreg_test(ptr addrspace(1) %out) { ; sim
   ret void
 }
 
-declare i32 @llvm.amdgcn.s.getreg(i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
index 3e35593f81745..1d777970feae9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.incperflevel.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.incperflevel(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_incperflevel:
 ; GCN: s_incperflevel 0{{$}}
 ; GCN: s_incperflevel 1{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
index 82468b6a2076b..6af9e176881d7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll
@@ -3,8 +3,6 @@
 
 ; ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.s.memrealtime
 
-declare i64 @llvm.amdgcn.s.memrealtime() #0
-
 ; GCN-LABEL: {{^}}test_s_memrealtime:
 ; GCN-DAG: s_memrealtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN-DAG: s_load_dwordx2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
index 1d7edb2fd9a4e..b218129047f4c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i64 @llvm.amdgcn.s.memtime() #0
-
 ; GCN-LABEL: {{^}}test_s_memtime:
 ; GCN-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN-DAG: s_load_dwordx2
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll
index 2eb98336b0711..626ea65be4d26 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.nop.ll
@@ -26,5 +26,3 @@ define amdgpu_kernel void @test_s_nop() {
   call void @llvm.amdgcn.s.nop(i16 63)
   ret void
 }
-
-declare void @llvm.amdgcn.s.nop(i16)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll
index 374c646cb3648..737a00e505f2d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sethalt.ll
@@ -24,5 +24,3 @@ define amdgpu_kernel void @test_s_sethalt() {
   call void @llvm.amdgcn.s.sethalt(i32 7)
   ret void
 }
-
-declare void @llvm.amdgcn.s.sethalt(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
index 087f798b58ff8..fde2ec0eb5e8a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setprio.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -global-isel -mtriple=amdgcn -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -show-mc-encoding -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
 
-declare void @llvm.amdgcn.s.setprio(i16) #0
-
 define void @test_llvm_amdgcn_s_setprio() #0 {
 ; GFX9-LABEL: test_llvm_amdgcn_s_setprio:
 ; GFX9:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
index 05186ac2aa28f..2a2dcba47b1e0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.setreg.ll
@@ -1506,6 +1506,4 @@ define amdgpu_ps void @test_minus_2047(i32 inreg %var.mode) {
 ;   ret void
 ; }
 
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
index e3a577ebe49f7..6ae6f133cdada 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.sleep(i32) #0
-
 ; GCN-LABEL: {{^}}test_s_sleep:
 ; GCN: s_sleep 0{{$}}
 ; GCN: s_sleep 1{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
index 4a404af54188d..ab6ad468b347f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s
 
-declare void @llvm.amdgcn.s.sleep.var(i32)
-
 define void @test_s_sleep_var1(i32 %arg) {
 ; GCN-LABEL: test_s_sleep_var1:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
index 5dec1e15cb3d5..927fbdf62731e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.ttracedata.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
 
-declare void @llvm.amdgcn.s.ttracedata(i32)
-declare void @llvm.amdgcn.s.ttracedata.imm(i16)
-
 define amdgpu_cs void @ttracedata_c() {
 ; GFX11-LABEL: ttracedata_c:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
index 08c77148f6ae1..b3a09df012042 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.event.ll
@@ -13,6 +13,4 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.s.wait.event.export.ready() #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll
index ff8f28dae3f8f..1798ed61a210a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.wait.gfx12.ll
@@ -84,11 +84,3 @@ define amdgpu_ps void @test_storecnt_dscnt() {
   call void @llvm.amdgcn.s.wait.dscnt(i16 0)
   ret void
 }
-
-declare void @llvm.amdgcn.s.wait.bvhcnt(i16)
-declare void @llvm.amdgcn.s.wait.dscnt(i16)
-declare void @llvm.amdgcn.s.wait.expcnt(i16)
-declare void @llvm.amdgcn.s.wait.kmcnt(i16)
-declare void @llvm.amdgcn.s.wait.loadcnt(i16)
-declare void @llvm.amdgcn.s.wait.samplecnt(i16)
-declare void @llvm.amdgcn.s.wait.storecnt(i16)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
index efaf47208b1f1..e338678d7d0a5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -43,10 +43,5 @@ define amdgpu_ps void @test3(<8 x i32> inreg %rsrc, i32 %c) {
   ret void
 }
 
-declare void @llvm.amdgcn.s.waitcnt(i32) #0
-
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
index 87c92137cddee..a0b684f772d77 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_sad_hi_u8:
 ; GCN: v_sad_hi_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_hi_u8(ptr addrspace(1) %out, i32 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
index 2c013cc35ad52..2ef5c8929c8b4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_sad_u16:
 ; GCN: v_sad_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u16(ptr addrspace(1) %out, i32 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
index 84b663a508a42..88534cad63228 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_sad_u8:
 ; GCN: v_sad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
 define amdgpu_kernel void @v_sad_u8(ptr addrspace(1) %out, i32 %src) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
index 25b36173cc65b..9026cbdf4d2e2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll
@@ -550,7 +550,5 @@ define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
index 95e3446bed081..77653f3d3bf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.barrier.ll
@@ -17,7 +17,5 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.sched.barrier(i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
index c2e74eb05d164..36d0e0915eb89 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.gfx11.ll
@@ -394,10 +394,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) #1
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg) #1
-
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,32" }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
index 10f09b6390aba..d4bc4c5b4d75c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll
@@ -1613,11 +1613,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.sched.group.barrier(i32, i32, i32) #1
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) #1
-declare float @llvm.exp.f32(float) #2
-
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="1,256" }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
index ac547295c77d5..339c5cd175691 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
-declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot2_clamp
 ; GFX906: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX908: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
index fb44d11bc30c0..f7d704932392d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll
@@ -5,8 +5,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot4_clamp
 ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
index 1c45a784b7296..87f07bc0af214 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll
@@ -6,8 +6,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_sdot8_clamp
 ; GFX906:  v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX908:  v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
index fdd457ca992ea..b770f45ec492a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.ll
@@ -161,7 +161,4 @@ end:
   ret void
 }
 
-declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
-declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
index eb30484ea7f19..dc4a68d48b3ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
@@ -177,6 +177,3 @@ define amdgpu_kernel void @test_get_99999_i64(ptr addrspace(1) %out) {
   store i64 %ret, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32)
-declare i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
index b3acd4949301e..bc7713153ce50 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.chain.arg.ll
@@ -781,11 +781,6 @@ define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %
   store i32 %wwm, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64, i64) #0
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
 declare amdgpu_gfx void @gfx_callee(<12 x i32>)
 
 attributes #0 = { convergent readnone willreturn nocallback nofree}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
index 8302af7450ed9..ee95538414e3d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
@@ -124,8 +124,4 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32)
-
 attributes #0 = { convergent readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
index 937b8bf94eeae..506a357f2b3c0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll
@@ -1,8 +1,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.sffbh.i32(i32) #1
-
 ; GCN-LABEL: {{^}}s_flbit:
 ; GCN: s_load_dword [[VAL:s[0-9]+]],
 ; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll
index 0fe064060d989..5ac84308bc7d7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 
-declare half @llvm.amdgcn.sin.f16(half %a)
-
 ; GCN-LABEL: {{^}}sin_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; VI:  v_sin_f16_e32 v[[R_F16:[0-9]+]], v[[A_F16]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
index 2b61ccad1d652..76e05952f1bfe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare float @llvm.amdgcn.sin.f32(float) #0
-
 ; GCN-LABEL: {{^}}v_sin_f32:
 ; GCN: v_sin_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
 define amdgpu_kernel void @v_sin_f32(ptr addrspace(1) %out, float %src) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
index 5fb50d7e8589a..057eb2f053677 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.softwqm.ll
@@ -279,18 +279,6 @@ END:
   ret float %r
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32 immarg) #3
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare float @llvm.amdgcn.wqm.f32(float) #3
-declare float @llvm.amdgcn.softwqm.f32(float) #3
-declare i32 @llvm.amdgcn.softwqm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wwm.f32(float) #3
-declare float @llvm.amdgcn.wwm.f32(float) #3
-declare void @llvm.amdgcn.wqm.demote(i1) #1
-
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readonly }
 attributes #3 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
index bbfb88a4b22a3..58eb26d295b5b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.f16.ll
@@ -35,7 +35,4 @@ define half @v_fneg_fabs_sqrt_f16(half %src)  {
   ret half %sqrt
 }
 
-declare half @llvm.amdgcn.sqrt.f16(half) #0
-declare half @llvm.fabs.f16(half) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
index 0257a3d111421..82568dbb3aae1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sqrt.ll
@@ -70,9 +70,4 @@ define double @v_fneg_fabs_sqrt_f64(double %src)  {
   ret double %sqrt
 }
 
-declare float @llvm.amdgcn.sqrt.f32(float) #0
-declare double @llvm.amdgcn.sqrt.f64(double) #0
-declare float @llvm.fabs.f32(float) #0
-declare double @llvm.fabs.f64(double) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
index 0bdb21f767191..a5abab2e3ef7c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.fadd.ll
@@ -55,7 +55,4 @@ define amdgpu_ps void @struct_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
index a7a2356eda3b9..71e991064d695 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
@@ -147,20 +147,4 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.struct.buffer.atomic.swap.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare float @llvm.amdgcn.struct.buffer.atomic.swap.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.sub.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umin.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.smax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.umax.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.and.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.or.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.xor.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.inc.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.dec.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.struct.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll
index 9bf64ba1aa2ff..12969e854ccb2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.d16.ll
@@ -57,9 +57,3 @@ main_body:
   %fdata = bitcast i16 %data to half
   ret half %fdata
 }
-
-declare half @llvm.amdgcn.struct.buffer.load.format.f16(<4 x i32>, i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.format.v2f16(<4 x i32>, i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.format.v4f16(<4 x i32>, i32, i32, i32, i32)
-declare i16 @llvm.amdgcn.struct.buffer.load.format.i16(<4 x i32>, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
index 00be32b06de05..0e14aa1c72a35 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll
@@ -1041,17 +1041,4 @@ define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace
   %fstatus = bitcast i32 %status to float
   ret float %fstatus
 }
-
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
index 4c1ae4c228adb..adf07a8dea3d2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll
@@ -157,8 +157,6 @@ bb:
   unreachable
 }
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x half> @llvm.amdgcn.struct.buffer.load.format.v3f16(<4 x i32>, i32, i32, i32, i32 immarg) #1
 attributes #0 = { nounwind readnone willreturn }
 attributes #1 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll
index 21dc07cf28fd6..7bff72cffc70d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL
 
-declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
-
 define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) {
 ; SDAG-LABEL: buffer_load_lds_dword:
 ; SDAG:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
index 5e03748bee08f..dc6921cdaaa99 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -272,21 +272,4 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.struct.buffer.load.i8(<4 x i32>, i32, i32, i32, i32) #0
-
-declare half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) #0
-
-declare i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32>, i32, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.struct.buffer.load.v2i16(<4 x i32>, i32, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.struct.buffer.load.v4i16(<4 x i32>, i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
index 73ac61a77a94e..a47ca57ab0829 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.d16.ll
@@ -84,9 +84,3 @@ main_body:
   call void @llvm.amdgcn.struct.buffer.store.format.i16(i16 %data, <4 x i32> %rsrc, i32 %index, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.buffer.store.format.f16(half, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.buffer.store.format.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.buffer.store.format.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.buffer.store.format.i16(i16, <4 x i32>, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
index 192b01ac085d7..ad33239331666 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.format.ll
@@ -104,11 +104,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.format.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.format.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
index 94c9f7ab8e756..7748852ab0f15 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
@@ -385,20 +385,5 @@ define amdgpu_ps void @struct_buffer_store_v4i16(<4 x i32> inreg %rsrc, <4 x i16
   ret void
 }
 
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.struct.buffer.store.i8(i8, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.i16(i16, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2i16(<2 x i16>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4i16(<4 x i16>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.f16(half, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.buffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
index 370859fd07d52..fd0215550f4a4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.ll
@@ -55,7 +55,4 @@ define amdgpu_ps void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
index fb44515595a55..7c42650b2f5ba 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.ll
@@ -147,20 +147,4 @@ main_body:
   ret float %out
 }
 
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare float @llvm.amdgcn.struct.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare i64 @llvm.amdgcn.struct.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll
index 3a396b54f89ab..97781c43b14a2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.d16.ll
@@ -57,9 +57,3 @@ main_body:
   %fdata = bitcast i16 %data to half
   ret half %fdata
 }
-
-declare half @llvm.amdgcn.struct.ptr.buffer.load.format.f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f16(ptr addrspace(8), i32, i32, i32, i32)
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.format.i16(ptr addrspace(8), i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
index b0bd4e428ef2d..ce4d6ee7f2c3c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.ll
@@ -834,17 +834,4 @@ define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr ad
   %fstatus = bitcast i32 %status to float
   ret float %fstatus
 }
-
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
-declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
index 46b2516f72f8e..88628af05d1c5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll
@@ -129,8 +129,6 @@ bb:
   unreachable
 }
 ; Function Attrs: nounwind readnone willreturn
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x half> @llvm.amdgcn.struct.ptr.buffer.load.format.v3f16(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
 attributes #0 = { nounwind readnone willreturn }
 attributes #1 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.ll
index 04a9f926acd5b..fe15c18d6f02a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.lds.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL
 
-declare void @llvm.amdgcn.struct.ptr.buffer.load.lds(ptr addrspace(8) %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux)
-
 define amdgpu_ps float @buffer_load_lds_dword(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) inreg %lds) {
 ; SDAG-LABEL: buffer_load_lds_dword:
 ; SDAG:       ; %bb.0: ; %main_body
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
index 2f9e6b0a1cf52..93169cae18b23 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.ll
@@ -272,21 +272,4 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare i32 @llvm.amdgcn.struct.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x i32> @llvm.amdgcn.struct.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare i8 @llvm.amdgcn.struct.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32, i32) #0
-
-declare half @llvm.amdgcn.struct.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x half> @llvm.amdgcn.struct.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x half> @llvm.amdgcn.struct.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) #0
-
-declare i16 @llvm.amdgcn.struct.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <2 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x i16> @llvm.amdgcn.struct.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32, i32) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
index 8109fca4a043a..1d87045c47c41 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.d16.ll
@@ -84,9 +84,3 @@ main_body:
   call void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16 %data, ptr addrspace(8) %rsrc, i32 %index, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f16(half, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i16(i16, ptr addrspace(8), i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
index 13217b24dcd4b..79d651a7fc104 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.format.ll
@@ -104,11 +104,5 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.format.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
index e52af31360764..8a9e1ed969030 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.store.ll
@@ -177,21 +177,6 @@ define amdgpu_ps void @struct_ptr_buffer_store_v4i16(ptr addrspace(8) inreg %rsr
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32) #0
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll
index 7f21fe8d238b8..e1c8bf112fa3a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.d16.ll
@@ -151,8 +151,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.ll
index 84f818a168a4f..c1cae91217939 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.load.ll
@@ -439,8 +439,3 @@ main_body:
   %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
   ret <4 x float> %data
 }
-
-declare i32 @llvm.amdgcn.struct.ptr.tbuffer.load.i32(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
index 439742d6b315d..0a112a5dce453 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.d16.ll
@@ -233,8 +233,3 @@ main_body:
   call void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.f16(half, ptr addrspace(8), i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v3f16(<3 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
index f84422efcf991..233deb8c86945 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.tbuffer.store.ll
@@ -501,12 +501,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
index f74b6fe7893ea..6e395aa345f0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.d16.ll
@@ -183,8 +183,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32)
-declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32)
-declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32)
-declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
index 0c0f50bd49557..a5344e5d02d5a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll
@@ -552,8 +552,3 @@ main_body:
   %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0)
   ret <4 x float> %data
 }
-
-declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
index 22ec22dc2db02..3e2f96380e9e7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.d16.ll
@@ -306,8 +306,3 @@ main_body:
   call void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 33, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.struct.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32)
-declare void @llvm.amdgcn.struct.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
index 98ed437581e1a..d76ce3bd41b1f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.store.ll
@@ -616,12 +616,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
index 501342845c4f5..b324c18d2b88c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot4.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sudot4(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
-
 define i32 @test_llvm_amdgcn_sudot4_uu(i32 %a, i32 %b, i32 %c) {
 ; GFX11-LABEL: test_llvm_amdgcn_sudot4_uu:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
index 4355cc84aa721..96a86f749ce22 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sudot8.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GFX11
 
-declare i32 @llvm.amdgcn.sudot8(i1 %asign, i32 %a, i1 %bsign, i32 %b, i32 %c, i1 %clamp)
-
 define i32 @test_llvm_amdgcn_sudot8_uu(i32 %a, i32 %b, i32 %c) {
 ; GFX11-LABEL: test_llvm_amdgcn_sudot8_uu:
 ; GFX11:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll
index 06d66ce9db37d..edbd241125b5a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.d16.ll
@@ -48,8 +48,3 @@ main_body:
   %elt = extractelement <4 x half> %data, i32 3
   ret half %elt
 }
-
-declare half @llvm.amdgcn.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <2 x half> @llvm.amdgcn.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <3 x half> @llvm.amdgcn.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <4 x half> @llvm.amdgcn.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.dwordx3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.dwordx3.ll
index de929c2893baf..aa1d29e78df46 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.dwordx3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.dwordx3.ll
@@ -34,7 +34,3 @@ main_body:
     ret <3 x float> %vdata.f
 }
 
-declare <3 x i32> @llvm.amdgcn.raw.tbuffer.load.v3i32(<4 x i32>, i32, i32, i32, i32)
-declare <3 x i32> @llvm.amdgcn.struct.tbuffer.load.v3i32(<4 x i32>, i32, i32, i32, i32, i32)
-declare <3 x i32> @llvm.amdgcn.tbuffer.load.v3i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
index eb0dc37849b0f..94b5ab71fa212 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll
@@ -102,8 +102,3 @@ define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) {
     %vdata.f = bitcast i32 %vdata to float
     ret float %vdata.f
 }
-
-declare i32 @llvm.amdgcn.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <2 x i32> @llvm.amdgcn.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <4 x i32> @llvm.amdgcn.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare <4 x float> @llvm.amdgcn.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll
index 16d7de698aa03..255f34cca168d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.d16.ll
@@ -69,8 +69,3 @@ main_body:
   call void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half> %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0, i32 1, i32 2, i1 0, i1 0)
   ret void
 }
-
-declare void @llvm.amdgcn.tbuffer.store.f16(half, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare void @llvm.amdgcn.tbuffer.store.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare void @llvm.amdgcn.tbuffer.store.v3f16(<3 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
-declare void @llvm.amdgcn.tbuffer.store.v4f16(<4 x half>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.dwordx3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.dwordx3.ll
index 83b5b0c65c47e..14e99e80059ed 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.dwordx3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.dwordx3.ll
@@ -29,7 +29,3 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.tbuffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.struct.tbuffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32, i32, i32) #0
-declare void @llvm.amdgcn.tbuffer.store.v3i32(<3 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
index b3a135a35a652..9b0f6992b0060 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.store.ll
@@ -99,12 +99,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare void @llvm.amdgcn.tbuffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
index 66708f683abed..9478558ad2a64 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll
@@ -1,8 +1,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 
-declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone
-
 ; SI-LABEL: {{^}}test_trig_preop_f64:
 ; SI-DAG: buffer_load_dword [[SEG:v[0-9]+]]
 ; SI-DAG: buffer_load_dwordx2 [[SRC:v\[[0-9]+:[0-9]+\]]],
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
index 0755dcddd8f46..f7c2dca5caac1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
@@ -1732,7 +1732,5 @@ define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
index 2c1472313ca7a..ef33c54e9c2cc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll
@@ -4,9 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1011 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
-declare i32 @llvm.amdgcn.udot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot2_clamp:
 ; GFX9:   v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
index 7895c60f3467e..fa2c15e58e3cb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
-declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot4_clamp
 ; GFX9:   v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
index c2df2d9882f3b..dbd5445dc0fe8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1012 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GFX10
 
-declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp)
-
 ; GCN-LABEL: {{^}}test_llvm_amdgcn_udot8_clamp
 ; GFX9:   v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
 ; GFX10:  v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll
index 6e2bb00925f71..c2650a15a9498 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.unreachable.ll
@@ -5,5 +5,3 @@ define amdgpu_kernel void @f() {
   call void @llvm.amdgcn.unreachable()
   ret void
 }
-
-declare void @llvm.amdgcn.unreachable()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
index e43daf46e1e06..cf6149923347c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll
@@ -455,12 +455,4 @@ define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare void @llvm.amdgcn.s.barrier()
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
-declare <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16>, <2 x i16>, i32, i32, i32, i1) #0
-declare <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half>, <2 x half>, i32, i32, i32, i1) #0
-declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0
-declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0
-
 attributes #0 = { nounwind readnone convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
index 4b4bdfe118d0d..d87452176d6fb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.barrier.ll
@@ -22,7 +22,5 @@ entry:
   unreachable
 }
 
-declare void @llvm.amdgcn.wave.barrier() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
index df5533b629502..42779c6c9f4f1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wave.id.ll
@@ -57,5 +57,3 @@ define amdgpu_gfx void @test_wave_id_callable(ptr addrspace(1) %out) {
   store i32 %waveid, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.wave.id()
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
index 270ab5fee1125..b2eab4e38f08c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll
@@ -79,6 +79,4 @@ bb3:                                              ; preds = %bb2, %bb
   ret void
 }
 
-declare i32 @llvm.amdgcn.wavefrontsize() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll
index a538364bf2ec4..3cccfbf44b9b7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll
@@ -1,15 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W32
 
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half>, <16 x half> , <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16>, <16 x i16> , <8 x float>)
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg)
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied(<16 x half>, <16 x half> , <16 x half>, i1 immarg)
-declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg)
-declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg)
-
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll
index 368d637f8ac1b..f40febc0a5a21 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll
@@ -1,15 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W64
 
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half>, <16 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16>, <16 x i16>, <4 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.b8f16.v16f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.tied.v8f16.v16f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.tied.v8i16.v16i16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 immarg, <4 x i32>, i1 immarg, <4 x i32>, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <4 x i32>, i1 immarg)
-
 ; @llvm.amdgcn.wmma.f32.16x16x16.f16
 
 define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, ptr addrspace(1) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
index ab29ca4a99734..bcb69fb783f84 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll
@@ -3,10 +3,6 @@
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 ; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.y() #0
-declare i32 @llvm.amdgcn.workgroup.id.z() #0
-
 ; ALL-LABEL: {{^}}test_workgroup_id_x:
 
 ; MESA3D: .amd_kernel_code_t
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index a1835ea176d5b..2aa80eaf9bc86 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,10 +5,6 @@
 ; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 ; RUN: llc -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
 ; MESA: .section .AMDGPU.config
 ; MESA: .long 47180
 ; MESA-NEXT: .long 132{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
index 2e47cc505ee69..83fa71ec44da6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.demote.ll
@@ -1217,17 +1217,6 @@ define amdgpu_ps void @static_exact_nop(float %arg0, float %arg1) {
   ret void
 }
 
-
-declare void @llvm.amdgcn.wqm.demote(i1) #0
-declare i1 @llvm.amdgcn.live.mask() #0
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32 immarg, i32 immarg, i32) #2
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #3
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) #4
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
index d51efc2b275db..6d45f64630b4a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=1 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -global-isel=0 -verify-machineinstrs < %s | FileCheck  -check-prefixes=GFX11 %s
 
-declare i32 @llvm.amdgcn.s.wqm.i32(i32)
-declare i64 @llvm.amdgcn.s.wqm.i64(i64)
-
 define i32 @test_s_wqm_constant_i32() {
 ; GFX11-LABEL: test_s_wqm_constant_i32:
 ; GFX11:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
index a4da690b2af1f..93bab64d0eefe 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wqm.vote.ll
@@ -61,7 +61,4 @@ main_body:
   ret float 0.0
 }
 
-declare void @llvm.amdgcn.kill(i1) #1
-declare i1 @llvm.amdgcn.wqm.vote(i1)
-
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
index 37951669dbe75..c3c5deb1ea92a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
 
-declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
-
 ; CHECK-LABEL: {{^}}test_writelane_sreg:
 ; CIGFX9: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, m0
 ; GFX10: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
@@ -80,8 +78,6 @@ define amdgpu_kernel void @test_writelane_imm_oldval(ptr addrspace(1) %out, i32
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind readnone convergent }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
index eeddb3d5b8192..ed17b5a547519 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ceil.f16.ll
@@ -4,9 +4,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
-declare half @llvm.ceil.f16(half %a)
-declare <2 x half> @llvm.ceil.v2f16(<2 x half> %a)
-
 define amdgpu_kernel void @ceil_f16(
 ; SI-LABEL: ceil_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
index fcc4cb3436fd7..f507f823e89f8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -197,6 +197,3 @@ define amdgpu_kernel void @cos_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare half @llvm.cos.f16(half %a)
-declare <2 x half> @llvm.cos.v2f16(<2 x half> %a)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
index 2b585e1bfc7b5..c5c79bb6ceaba 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.ll
@@ -34,6 +34,3 @@ define amdgpu_kernel void @testv(ptr addrspace(1) %out, <4 x float> inreg %vx) #
    store <4 x float> %cos, ptr addrspace(1) %out
    ret void
 }
-
-declare float @llvm.cos.f32(float) readnone
-declare <4 x float> @llvm.cos.v4f32(<4 x float>) readnone
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
index 6c8fccd54b81b..ce4a506224c04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.dbg.value.ll
@@ -33,8 +33,6 @@ bb:
   ret void, !dbg !14
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
 attributes #0 = { nounwind  }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
index 7a0450761e1f1..fba196865afa8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp.ll
@@ -7451,17 +7451,6 @@ define float @v_exp_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp.f32(float) #2
-declare <2 x float> @llvm.exp.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp.f16(half) #2
-declare <2 x half> @llvm.exp.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp.v3f16(<3 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
index 544c1de6c7bb7..df72767432b50 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp10.ll
@@ -7545,17 +7545,6 @@ define float @v_exp10_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp10.f32(float) #2
-declare <2 x float> @llvm.exp10.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp10.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp10.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp10.f16(half) #2
-declare <2 x half> @llvm.exp10.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp10.v3f16(<3 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
index 36e78975cdb01..65794b41584b0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.exp2.ll
@@ -2952,17 +2952,6 @@ define float @v_exp2_f32_contract_nnan_ninf(float %in) {
   ret float %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.exp2.f32(float) #2
-declare <2 x float> @llvm.exp2.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.exp2.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.exp2.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.exp2.f16(half) #2
-declare <2 x half> @llvm.exp2.v2f16(<2 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.exp2.v3f16(<3 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
index e8d037c5ff53e..daf03fe518b89 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.floor.f16.ll
@@ -4,9 +4,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-FAKE16 %s
 
-declare half @llvm.floor.f16(half %a)
-declare <2 x half> @llvm.floor.v2f16(<2 x half> %a)
-
 define amdgpu_kernel void @floor_f16(
 ; SI-LABEL: floor_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
index 87d16385f7b26..8bacee7607f37 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -2,10 +2,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI,VIGFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VIGFX9 %s
 
-declare half @llvm.fma.f16(half %a, half %b, half %c)
-declare <2 x half> @llvm.fma.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
-declare <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c)
-
 ; GCN-LABEL: {{^}}fma_f16
 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
 ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index a2e30603b6afc..b549219e28ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -8,9 +8,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-FLUSH %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -denormal-fp-math=ieee -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11-DENORM %s
 
-declare half @llvm.fmuladd.f16(half %a, half %b, half %c)
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half> %a, <2 x half> %b, <2 x half> %c)
-
 define amdgpu_kernel void @fmuladd_f16(
 ; SI-LABEL: fmuladd_f16:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
index a113c07b3e2be..3dd755539b9f4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fptrunc.round.ll
@@ -48,5 +48,3 @@ define amdgpu_gs void @test_fptrunc_round_upward_multiple_calls(float %a, float
   store half %res5, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare half @llvm.fptrunc.round(float, metadata)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
index 14742c5827c1e..b2359fd324c06 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll
@@ -1222,18 +1222,6 @@ define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) {
   ret <2 x i32> %result.1
 }
 
-declare { float, i32 } @llvm.frexp.f32.i32(float) #0
-declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) #0
-
-declare { half, i32 } @llvm.frexp.f16.i32(half) #0
-declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) #0
-
-declare { double, i32 } @llvm.frexp.f64.i32(double) #0
-declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) #0
-
-declare { half, i16 } @llvm.frexp.f16.i16(half) #0
-declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
index aca7d3c720ceb..01e7f9cdf2bf8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.fpmode.ll
@@ -6,8 +6,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
 
-declare i32 @llvm.get.fpmode.i32()
-
 define i32 @func_fpmode_i32() {
 ; GFX678-LABEL: func_fpmode_i32:
 ; GFX678:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
index 48f6beb60f01e..cc1d91d08a392 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll
@@ -6,8 +6,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s
 
-declare i32 @llvm.get.rounding()
-
 define i32 @func_rounding() {
 ; GFX678-LABEL: func_rounding:
 ; GFX678:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
index ea823f30f26c2..090aacaa9295a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.bf16.ll
@@ -2898,11 +2898,6 @@ entry:
   ret i1 %0
 }
 
-declare i1 @llvm.is.fpclass.bf16(bfloat, i32)
-declare <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat>, i32)
-declare <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat>, i32)
-declare <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat>, i32)
-
 ; Assume DAZ
 attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
index ec3c08ec79523..163b84dce6ba4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.f16.ll
@@ -3207,11 +3207,6 @@ entry:
   ret i1 %0
 }
 
-declare i1 @llvm.is.fpclass.f16(half, i32)
-declare <2 x i1> @llvm.is.fpclass.v2f16(<2 x half>, i32)
-declare <3 x i1> @llvm.is.fpclass.v3f16(<3 x half>, i32)
-declare <4 x i1> @llvm.is.fpclass.v4f16(<4 x half>, i32)
-
 ; Assume DAZ
 attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
index 347e549e7cf56..04eafadf52ebf 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.is.fpclass.ll
@@ -1457,15 +1457,3 @@ define i1 @iszero_f32(float %x) nounwind {
   %1 = call i1 @llvm.is.fpclass.f32(float %x, i32 96)  ; 0x60 = "zero"
   ret i1 %1
 }
-
-declare i1 @llvm.is.fpclass.f32(float, i32)
-declare i1 @llvm.is.fpclass.f64(double, i32)
-declare <2 x i1> @llvm.is.fpclass.v2f32(<2 x float>, i32)
-declare <3 x i1> @llvm.is.fpclass.v3f32(<3 x float>, i32)
-declare <4 x i1> @llvm.is.fpclass.v4f32(<4 x float>, i32)
-declare <5 x i1> @llvm.is.fpclass.v5f32(<5 x float>, i32)
-declare <6 x i1> @llvm.is.fpclass.v6f32(<6 x float>, i32)
-declare <7 x i1> @llvm.is.fpclass.v7f32(<7 x float>, i32)
-declare <8 x i1> @llvm.is.fpclass.v8f32(<8 x float>, i32)
-declare <16 x i1> @llvm.is.fpclass.v16f32(<16 x float>, i32)
-declare <2 x i1> @llvm.is.fpclass.v2f64(<2 x double>, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
index dc7fe840f5118..f0104383ba9d4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.ldexp.ll
@@ -565,20 +565,4 @@ define <2 x half> @test_ldexp_v2f16_v2i16(<2 x half> %a, <2 x i16> %b) {
   ret <2 x half> %result
 }
 
-declare float @llvm.ldexp.f32.i32(float, i32) #0
-declare float @llvm.ldexp.f32.i16(float, i16) #0
-declare float @llvm.ldexp.f32.i64(float, i64) #0
-declare half @llvm.ldexp.f16.i8(half, i8) #0
-declare half @llvm.ldexp.f16.i16(half, i16) #0
-declare half @llvm.ldexp.f16.i32(half, i32) #0
-declare <2 x half> @llvm.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>) #0
-declare <2 x half> @llvm.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>) #0
-declare <2 x float> @llvm.ldexp.v2f32.v2i64(<2 x float>, <2 x i64>) #0
-declare <3 x float> @llvm.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>) #0
-declare <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>) #0
-declare double @llvm.ldexp.f64.i32(double, i32) #0
-declare <2 x double> @llvm.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index d847af780acab..1a4a0d8f1b8e6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -7682,18 +7682,6 @@ define <4 x half> @v_log_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log.f32(float) #2
-declare <2 x float> @llvm.log.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log.f16(half) #2
-declare <2 x half> @llvm.log.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 3f060de9f6596..9aad7db25119f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -7682,18 +7682,6 @@ define <4 x half> @v_log10_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log10.f32(float) #2
-declare <2 x float> @llvm.log10.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log10.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log10.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log10.f16(half) #2
-declare <2 x half> @llvm.log10.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log10.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log10.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index 035b2439eff15..f7bd61a945f0a 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -3990,18 +3990,6 @@ define <4 x half> @v_log2_v4f16_fast(<4 x half> %in) {
   ret <4 x half> %result
 }
 
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.log2.f32(float) #2
-declare <2 x float> @llvm.log2.v2f32(<2 x float>) #2
-declare <3 x float> @llvm.log2.v3f32(<3 x float>) #2
-declare <4 x float> @llvm.log2.v4f32(<4 x float>) #2
-declare half @llvm.fabs.f16(half) #2
-declare half @llvm.log2.f16(half) #2
-declare <2 x half> @llvm.log2.v2f16(<2 x half>) #2
-declare <3 x half> @llvm.log2.v3f16(<3 x half>) #2
-declare <4 x half> @llvm.log2.v4f16(<4 x half>) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-
 attributes #0 = { "denormal-fp-math-f32"="ieee,preserve-sign" }
 attributes #1 = { "denormal-fp-math-f32"="dynamic,dynamic" }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index d056a97dc5444..4216f526b7640 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -5,11 +5,6 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX10 %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GFX11 %s
 
-declare half @llvm.maxnum.f16(half %a, half %b)
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> %b)
-declare <3 x half> @llvm.maxnum.v3f16(<3 x half> %a, <3 x half> %b)
-declare <4 x half> @llvm.maxnum.v4f16(<4 x half> %a, <4 x half> %b)
-
 define amdgpu_kernel void @maxnum_f16(
 ; SI-LABEL: maxnum_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
index 41e8762311306..7310f346cfb51 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll
@@ -1,10 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture, i32, i1) nounwind
-declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.p1.p2.i64(ptr addrspace(1) nocapture, ptr addrspace(4) nocapture, i64, i1) nounwind
-
 
 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
 ; SI-DAG: ds_read_u8
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index f934a2de9247f..67d1472a0fc21 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -5,11 +5,6 @@
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX10 %s
 ; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10PLUS,GFX11 %s
 
-declare half @llvm.minnum.f16(half %a, half %b)
-declare <2 x half> @llvm.minnum.v2f16(<2 x half> %a, <2 x half> %b)
-declare <3 x half> @llvm.minnum.v3f16(<3 x half> %a, <3 x half> %b)
-declare <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
-
 define amdgpu_kernel void @minnum_f16_ieee(
 ; SI-LABEL: minnum_f16_ieee:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
index c3e665fa8269a..d7d8ecf4ae8b9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll
@@ -857,6 +857,3 @@ bb:
   %umulo = tail call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %i, i64 4)
   ret { i64, i1 } %umulo
 }
-
-declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64)
-declare { i64, i1 } @llvm.smul.with.overflow.i64(i64, i64)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
index 4a03e4e3ea72c..357e337d0da1e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.pow-gfx9.ll
@@ -15,5 +15,3 @@ entry:
   store float %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare float @llvm.pow.f32(float ,float ) readonly
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.pow.ll b/llvm/test/CodeGen/AMDGPU/llvm.pow.ll
index 88b0526b174b9..3d4750134fb9d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.pow.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.pow.ll
@@ -32,7 +32,3 @@ define amdgpu_ps void @test2(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
    call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
    ret void
 }
-
-declare float @llvm.pow.f32(float ,float ) readonly
-declare <4 x float> @llvm.pow.v4f32(<4 x float> ,<4 x float> ) readonly
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
index a467e29d0bff6..cf69a1b8a7881 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
@@ -405,8 +405,4 @@ define float @v_powi_neg128_f32(float %l) {
 ;   ret double %res
 ; }
 
-declare half @llvm.powi.f16.i32(half, i32) #0
-declare float @llvm.powi.f32.i32(float, i32) #0
-declare double @llvm.powi.f64.i32(double, i32) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
index 77fd0bc058aca..49432d863b23c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.prefetch.ll
@@ -251,10 +251,3 @@ entry:
   tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
-declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
-declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
-declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
-declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
-declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
index 7ec7d9bdb400f..ff4a9d3427442 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll
@@ -44,14 +44,9 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.cube(<4 x float>) #0
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #0
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
index 3607e2328c71b..b3ba38c12b1b7 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s
 
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone
-
 define amdgpu_kernel void @test_dp4(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind {
   %src0 = load <4 x float>, ptr addrspace(1) %a, align 16
   %src1 = load <4 x float>, ptr addrspace(1) %b, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
index 704e6bb2e04b0..d79c6090ebbde 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll
@@ -18,13 +18,10 @@ entry:
 }
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.r600.group.barrier() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #2
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.x() #2
 
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
index ab035b9de04b9..57f3306de2f60 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll
@@ -171,8 +171,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.r600.read.local.size.x() #0
-declare i32 @llvm.r600.read.local.size.y() #0
-declare i32 @llvm.r600.read.local.size.z() #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
index 07010c85389e8..82c7288ec7317 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
 
-declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone
-
 ; EG-LABEL: {{^}}rsq_clamped_f32:
 ; EG: RECIPSQRT_CLAMPED
 define amdgpu_kernel void @rsq_clamped_f32(ptr addrspace(1) %out, float %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
index ba261e26d63c9..dd6c45771f58c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG %s
 
-declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone
-
 ; EG-LABEL: {{^}}recipsqrt.ieee_f32:
 ; EG: RECIPSQRT_IEEE
 define amdgpu_kernel void @recipsqrt.ieee_f32(ptr addrspace(1) %out, float %src) nounwind {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
index e670e78954c86..62e1a3f950368 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll
@@ -57,9 +57,7 @@ bb:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
index 84afa3b0096ea..863948ee39f70 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f16.ll
@@ -4,9 +4,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX89,GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11 %s
 
-declare half @llvm.rint.f16(half %a)
-declare <2 x half> @llvm.rint.v2f16(<2 x half> %a)
-
 define amdgpu_kernel void @rint_f16(
 ; SI-LABEL: rint_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
index 28781ae9f13c7..dc02819697a44 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.f64.ll
@@ -39,8 +39,3 @@ entry:
   store <4 x double> %0, ptr addrspace(1) %out
   ret void
 }
-
-
-declare double @llvm.rint.f64(double) #0
-declare <2 x double> @llvm.rint.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.rint.v4f64(<4 x double>) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
index 58a7771c23a34..6e8ae6109ac8e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.rint.ll
@@ -43,8 +43,4 @@ entry:
   ret void
 }
 
-declare float @llvm.rint.f32(float) #0
-declare <2 x float> @llvm.rint.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.rint.v4f32(<4 x float>) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index ddbc5ef4e5b60..a8f5e7f228a96 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -661,12 +661,5 @@ define amdgpu_kernel void @round_v8f64(ptr addrspace(1) %out, <8 x double> %in)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-declare double @llvm.round.f64(double) #1
-declare <2 x double> @llvm.round.v2f64(<2 x double>) #1
-declare <4 x double> @llvm.round.v4f64(<4 x double>) #1
-declare <8 x double> @llvm.round.v8f64(<8 x double>) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
index 7ad7cc821c1b5..f9c82ed6faf8b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.ll
@@ -951,15 +951,5 @@ define amdgpu_kernel void @round_v2f16(ptr addrspace(1) %out, i32 %in.arg) #0 {
   ret void
 }
 
-declare float @llvm.round.f32(float) #1
-declare <2 x float> @llvm.round.v2f32(<2 x float>) #1
-declare <4 x float> @llvm.round.v4f32(<4 x float>) #1
-declare <8 x float> @llvm.round.v8f32(<8 x float>) #1
-
-declare half @llvm.round.f16(half) #1
-declare <2 x half> @llvm.round.v2f16(<2 x half>) #1
-declare <4 x half> @llvm.round.v4f16(<4 x half>) #1
-declare <8 x half> @llvm.round.v8f16(<8 x half>) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
index 2ce0a628686ea..488aa58f11503 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -197,6 +197,3 @@ define amdgpu_kernel void @sin_v2f16(ptr addrspace(1) %r, ptr addrspace(1) %a) {
   store <2 x half> %r.val, ptr addrspace(1) %r
   ret void
 }
-
-declare half @llvm.sin.f16(half %a)
-declare <2 x half> @llvm.sin.v2f16(<2 x half> %a)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
index 576ed270183f6..607d00f269562 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll
@@ -170,9 +170,6 @@ define amdgpu_kernel void @sin_v4f32(ptr addrspace(1) %out, <4 x float> %vx) #1
    ret void
 }
 
-declare float @llvm.sin.f32(float) #0
-declare <4 x float> @llvm.sin.v4f32(<4 x float>) #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "unsafe-fp-math"="false" }
 attributes #2 = { nounwind "unsafe-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
index f2d57ba902e73..2927a369745cb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sqrt.f16.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare half @llvm.sqrt.f16(half %a)
-declare <2 x half> @llvm.sqrt.v2f16(<2 x half> %a)
-
 define amdgpu_kernel void @sqrt_f16(
 ; SI-LABEL: sqrt_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
index d1e2ddcdc6eac..dfe72179ec5a2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.trunc.f16.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s
 
-declare half @llvm.trunc.f16(half %a)
-declare <2 x half> @llvm.trunc.v2f16(<2 x half> %a)
-
 define amdgpu_kernel void @trunc_f16(
 ; SI-LABEL: trunc_f16:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/load-input-fold.ll b/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
index a243e6f538722..a80817829ba0e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-input-fold.ll
@@ -95,12 +95,8 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.pow.f32(float, float) #3
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #1 = { readnone }
 attributes #2 = { readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
index 8d148b56c280e..b80b7e5a33257 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-redundant-copies.ll
@@ -117,7 +117,3 @@ define amdgpu_vs void @test_3(i32 inreg %arg1, i32 inreg %arg2, ptr addrspace(8)
 
   ret void
 }
-
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
index 8386a685a1a12..2b39bea88c4ec 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.amdgcn.ll
@@ -105,9 +105,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
index 7db27f5338fed..9ea9905985624 100644
--- a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll
@@ -79,9 +79,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
-declare void @llvm.r600.group.barrier() #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
index 52f97150e4b30..e00d6875a34bd 100644
--- a/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-stack-alloc-block-sp-reference.ll
@@ -325,6 +325,4 @@ entry:
   ret void
 }
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture writeonly, i8, i32, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
index cc90d03e66715..c12d11c9a532c 100644
--- a/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
+++ b/llvm/test/CodeGen/AMDGPU/long-branch-reserve-register.ll
@@ -6,9 +6,6 @@
 
 ; Used to emit an always 4 byte instruction. Inline asm always assumes
 ; each instruction is the maximum size.
-declare void @llvm.amdgcn.s.sleep(i32) #0
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 
 define amdgpu_kernel void @uniform_conditional_max_short_forward_branch(ptr addrspace(1) %arg, i32 %cnd) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/loop-prefetch.ll b/llvm/test/CodeGen/AMDGPU/loop-prefetch.ll
index 028758b99ca7f..92b3f12ec6e82 100644
--- a/llvm/test/CodeGen/AMDGPU/loop-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop-prefetch.ll
@@ -384,5 +384,3 @@ bb6:
   tail call void @llvm.amdgcn.s.sleep(i32 0)
   br i1 %tmp3, label %bb1, label %bb2
 }
-
-declare void @llvm.amdgcn.s.sleep(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/loop_break.ll b/llvm/test/CodeGen/AMDGPU/loop_break.ll
index 634390ba33caf..5247843d4c1bb 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_break.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_break.ll
@@ -538,7 +538,5 @@ bb9:                                              ; preds = %Flow
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
index a407cd20bf762..1463857df9583 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/loop_exit_with_xor.ll
@@ -146,6 +146,4 @@ loopexit:
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index 85cc810988a71..3f0a39c13debf 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -425,8 +425,6 @@ define <4 x ptr addrspace(7)> @shufflenvector(<2 x ptr addrspace(7)> %a, <2 x pt
   ret <4 x ptr addrspace(7)> %ret
 }
 
-declare ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7), i32)
-
 define ptr addrspace(7) @ptrmask(ptr addrspace(7) %p, i32 %mask) {
 ; CHECK-LABEL: define { ptr addrspace(8), i32 } @ptrmask
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[P:%.*]], i32 [[MASK:%.*]]) #[[ATTR0]] {
@@ -441,9 +439,6 @@ define ptr addrspace(7) @ptrmask(ptr addrspace(7) %p, i32 %mask) {
   ret ptr addrspace(7) %ret
 }
 
-declare ptr @llvm.invariant.start.p7(i64, ptr addrspace(7) nocapture)
-declare void @llvm.invariant.end.p7(ptr, i64, ptr addrspace(7) nocapture)
-
 define i32 @invariant_start_end(ptr addrspace(7) %p) {
 ; CHECK-LABEL: define i32 @invariant_start_end
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[P:%.*]]) #[[ATTR0]] {
@@ -460,9 +455,6 @@ define i32 @invariant_start_end(ptr addrspace(7) %p) {
   ret i32 %v
 }
 
-declare ptr addrspace(7) @llvm.launder.invariant.group.p7(ptr addrspace(7) nocapture)
-declare ptr addrspace(7) @llvm.strip.invariant.group.p7(ptr addrspace(7) nocapture)
-
 define ptr addrspace(7) @invariant_group(ptr addrspace(7) %p) {
 ; CHECK-LABEL: define { ptr addrspace(8), i32 } @invariant_group
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[P:%.*]]) #[[ATTR0]] {
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
index 69387e67c1c7e..7813d973e8540 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll
@@ -83,7 +83,6 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
 
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
index cb3ea2e812770..903fc5a79ef14 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-lds-struct-aa-memcpy.ll
@@ -63,8 +63,6 @@ entry:
   ret void
 }
 
-declare void @llvm.memcpy.p3.p3.i64(ptr addrspace(3) noalias nocapture writeonly, ptr addrspace(3) noalias nocapture readonly, i64, i1 immarg) #1
-
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-lds-size"="7" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
index e9d42dc70cbb9..3dd4181895e6f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics-threshold.ll
@@ -102,6 +102,4 @@ define amdgpu_kernel void @memset_size_8(ptr addrspace(1) %dst, i8 %val) {
   ret void
 }
 
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
index 0f4e790a6976f..ba13e82089415 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-mem-intrinsics.ll
@@ -6,29 +6,6 @@
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=1024 %s | FileCheck -check-prefixes=OPT,MAX1024 %s
 ; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -pre-isel-intrinsic-lowering -mem-intrinsic-expand-size=0 %s | FileCheck -check-prefixes=OPT,ALL %s
 
-declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memcpy.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) nocapture, ptr addrspace(1) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memcpy.p3.p3.i32(ptr addrspace(3) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-
-declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture, ptr addrspace(1) nocapture readonly, i64, i1) #1
-declare void @llvm.memmove.p1.p3.i32(ptr addrspace(1) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p3.p5.i32(ptr addrspace(3) nocapture, ptr addrspace(5) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p5.p3.i32(ptr addrspace(5) nocapture, ptr addrspace(3) nocapture readonly, i32, i1) #1
-declare void @llvm.memmove.p0.p1.i64(ptr nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p0.i64(ptr addrspace(1) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p5.p1.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p0.p5.i64(ptr nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p5.p0.i64(ptr addrspace(5) nocapture writeonly, ptr nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p1.p999.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(999) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p999.p1.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg) #1
-declare void @llvm.memmove.p999.p998.i64(ptr addrspace(999) nocapture writeonly, ptr addrspace(998) nocapture readonly, i64, i1 immarg) #1
-
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture, i8, i64, i1) #1
-
 ; Test the upper bound for sizes to leave
 define amdgpu_kernel void @max_size_small_static_memcpy_caller0(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
 ; MAX1024-LABEL: @max_size_small_static_memcpy_caller0(
@@ -266,7 +243,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
 ; OPT-NEXT:    [[TMP3:%.*]] = sub i64 [[N]], [[TMP2]]
 ; OPT-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP1]], 0
 ; OPT-NEXT:    br i1 [[TMP4]], label [[LOOP_MEMCPY_EXPANSION2:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5:%.*]]
-; OPT:       loop-memcpy-expansion2:
+; OPT:       loop-memcpy-expansion:
 ; OPT-NEXT:    [[LOOP_INDEX3:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[LOOP_MEMCPY_EXPANSION2]] ]
 ; OPT-NEXT:    [[TMP5:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC:%.*]], i64 [[LOOP_INDEX3]]
 ; OPT-NEXT:    [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP5]], align 1
@@ -275,7 +252,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
 ; OPT-NEXT:    [[TMP8]] = add i64 [[LOOP_INDEX3]], 1
 ; OPT-NEXT:    [[TMP9:%.*]] = icmp ult i64 [[TMP8]], [[TMP1]]
 ; OPT-NEXT:    br i1 [[TMP9]], label [[LOOP_MEMCPY_EXPANSION2]], label [[LOOP_MEMCPY_RESIDUAL_HEADER5]]
-; OPT:       loop-memcpy-residual4:
+; OPT:       loop-memcpy-residual:
 ; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX6:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER5]] ], [ [[TMP14:%.*]], [[LOOP_MEMCPY_RESIDUAL4:%.*]] ]
 ; OPT-NEXT:    [[TMP10:%.*]] = add i64 [[TMP3]], [[RESIDUAL_LOOP_INDEX6]]
 ; OPT-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP10]]
@@ -285,13 +262,13 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
 ; OPT-NEXT:    [[TMP14]] = add i64 [[RESIDUAL_LOOP_INDEX6]], 1
 ; OPT-NEXT:    [[TMP15:%.*]] = icmp ult i64 [[TMP14]], [[TMP2]]
 ; OPT-NEXT:    br i1 [[TMP15]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1:%.*]]
-; OPT:       post-loop-memcpy-expansion1:
+; OPT:       post-loop-memcpy-expansion:
 ; OPT-NEXT:    [[TMP16:%.*]] = lshr i64 [[M:%.*]], 4
 ; OPT-NEXT:    [[TMP17:%.*]] = and i64 [[M]], 15
 ; OPT-NEXT:    [[TMP18:%.*]] = sub i64 [[M]], [[TMP17]]
 ; OPT-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP16]], 0
 ; OPT-NEXT:    br i1 [[TMP19]], label [[LOOP_MEMCPY_EXPANSION:%.*]], label [[LOOP_MEMCPY_RESIDUAL_HEADER:%.*]]
-; OPT:       loop-memcpy-expansion:
+; OPT:       loop-memcpy-expansion2:
 ; OPT-NEXT:    [[LOOP_INDEX:%.*]] = phi i64 [ 0, [[POST_LOOP_MEMCPY_EXPANSION1]] ], [ [[TMP23:%.*]], [[LOOP_MEMCPY_EXPANSION]] ]
 ; OPT-NEXT:    [[TMP20:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[SRC]], i64 [[LOOP_INDEX]]
 ; OPT-NEXT:    [[TMP21:%.*]] = load <4 x i32>, ptr addrspace(1) [[TMP20]], align 1
@@ -300,7 +277,7 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
 ; OPT-NEXT:    [[TMP23]] = add i64 [[LOOP_INDEX]], 1
 ; OPT-NEXT:    [[TMP24:%.*]] = icmp ult i64 [[TMP23]], [[TMP16]]
 ; OPT-NEXT:    br i1 [[TMP24]], label [[LOOP_MEMCPY_EXPANSION]], label [[LOOP_MEMCPY_RESIDUAL_HEADER]]
-; OPT:       loop-memcpy-residual:
+; OPT:       loop-memcpy-residual4:
 ; OPT-NEXT:    [[RESIDUAL_LOOP_INDEX:%.*]] = phi i64 [ 0, [[LOOP_MEMCPY_RESIDUAL_HEADER]] ], [ [[TMP29:%.*]], [[LOOP_MEMCPY_RESIDUAL:%.*]] ]
 ; OPT-NEXT:    [[TMP25:%.*]] = add i64 [[TMP18]], [[RESIDUAL_LOOP_INDEX]]
 ; OPT-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[SRC]], i64 [[TMP25]]
@@ -310,14 +287,14 @@ define amdgpu_kernel void @memcpy_multi_use_one_function(ptr addrspace(1) %dst0,
 ; OPT-NEXT:    [[TMP29]] = add i64 [[RESIDUAL_LOOP_INDEX]], 1
 ; OPT-NEXT:    [[TMP30:%.*]] = icmp ult i64 [[TMP29]], [[TMP17]]
 ; OPT-NEXT:    br i1 [[TMP30]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION:%.*]]
-; OPT:       post-loop-memcpy-expansion:
+; OPT:       post-loop-memcpy-expansion1:
 ; OPT-NEXT:    ret void
 ; OPT:       loop-memcpy-residual-header:
-; OPT-NEXT:    [[TMP31:%.*]] = icmp ne i64 [[TMP17]], 0
-; OPT-NEXT:    br i1 [[TMP31]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
+; OPT-NEXT:    [[TMP31:%.*]] = icmp ne i64 [[TMP2]], 0
+; OPT-NEXT:    br i1 [[TMP31]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]]
 ; OPT:       loop-memcpy-residual-header5:
-; OPT-NEXT:    [[TMP32:%.*]] = icmp ne i64 [[TMP2]], 0
-; OPT-NEXT:    br i1 [[TMP32]], label [[LOOP_MEMCPY_RESIDUAL4]], label [[POST_LOOP_MEMCPY_EXPANSION1]]
+; OPT-NEXT:    [[TMP32:%.*]] = icmp ne i64 [[TMP17]], 0
+; OPT-NEXT:    br i1 [[TMP32]], label [[LOOP_MEMCPY_RESIDUAL]], label [[POST_LOOP_MEMCPY_EXPANSION]]
 ;
   call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst0, ptr addrspace(1) %src, i64 %n, i1 false)
   call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) %dst1, ptr addrspace(1) %src, i64 %m, i1 false)
@@ -1773,7 +1750,5 @@ entry:
   ret void
 }
 
-declare i64 @llvm.umin.i64(i64, i64)
-
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
index ed7bd22eea4b1..c943a044399bd 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-range-metadata-intrinsic-call.ll
@@ -36,9 +36,6 @@ entry:
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { norecurse nounwind "amdgpu-flat-work-group-size"="1,256" }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
index 9547f08d3eba6..b3b27eaf95bf1 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-hsa.ll
@@ -286,11 +286,6 @@ define void @workgroup_ids_device_func(ptr addrspace(1) %outx, ptr addrspace(1)
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
-
 attributes #0 = { nounwind "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX9ARCH: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
index cfff0a969da9e..07f2227d4de82 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-work-group-id-intrinsics-pal.ll
@@ -177,11 +177,6 @@ define amdgpu_gfx void @workgroup_ids_gfx(ptr addrspace(1) %outx, ptr addrspace(
   store volatile i32 %id.z, ptr addrspace(1) %outz
   ret void
 }
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32>, ptr addrspace(8), i32, i32, i32 immarg)
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX9-GISEL: {{.*}}
 ; GFX9-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
index 7830bfc6ac7f5..7d17fc66da976 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -142,5 +142,3 @@ bb:
   store i64 %tmp8, ptr addrspace(1) %tmp9, align 8
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index 994ef22539a65..6bffe9e99e732 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -706,7 +706,5 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
index 6672568b98a20..0e955e2f087b5 100644
--- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
+++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll
@@ -1074,7 +1074,6 @@ define protected amdgpu_kernel void @kernel_round1_short(ptr addrspace(1) nocapt
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i64 @llvm.fshl.i64(i64, i64, i64) #3
 
 attributes #0 = { convergent mustprogress nofree nounwind willreturn memory(none) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
 attributes #1 = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="gfx1030" "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32" }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
index e94aa4b8ce3d1..835b1bd83d806 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll
@@ -10,11 +10,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-FASTFMAF -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee -fp-contract=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=SI-DENORM -check-prefix=SI-DENORM-SLOWFMAF -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.fma.f32(float, float, float) #0
-declare float @llvm.fmuladd.f32(float, float, float) #0
-
 ; (fadd (fmul x, y), z) -> (fma x, y, z)
 ; FUNC-LABEL: {{^}}combine_to_mad_f32_0:
 ; SI-DAG: buffer_load_dword [[A:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
index 819b6ca98b3a8..7ff3e0dd83f7e 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi.ll
@@ -519,12 +519,5 @@ define <2 x half> @v_mad_mixhi_f16_f16lo_f16lo_f16lo_undeflo_clamp_postcvt_multi
   ret <2 x half> %vec.result
 }
 
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
index 5b7f0e72b70da..d8183ac42dc7c 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll
@@ -2504,32 +2504,5 @@ define half @mixlo_fptrunc_neg_src_mod(float %a, float %b) #0 {
   ret half %trunc
 }
 
-declare float @llvm.fabs.f32(float) #1
-
-declare half @llvm.minnum.f16(half, half) #1
-declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
-declare <3 x half> @llvm.minnum.v3f16(<3 x half>, <3 x half>) #1
-declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>) #1
-
-declare half @llvm.maxnum.f16(half, half) #1
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
-declare <3 x half> @llvm.maxnum.v3f16(<3 x half>, <3 x half>) #1
-declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>) #1
-
-declare float @llvm.minnum.f32(float, float) #1
-declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.minnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.minnum.v4f32(<4 x float>, <4 x float>) #1
-
-declare float @llvm.maxnum.f32(float, float) #1
-declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.maxnum.v3f32(<3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) #1
-
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #1
-declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index b520dd1060ec8..adce7b030b264 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -2622,14 +2622,6 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
   ret float %result
 }
 
-declare half @llvm.fabs.f16(half) #2
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2
-declare float @llvm.fabs.f32(float) #2
-declare float @llvm.minnum.f32(float, float) #2
-declare float @llvm.maxnum.f32(float, float) #2
-declare float @llvm.fmuladd.f32(float, float, float) #2
-declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mad.u16.ll b/llvm/test/CodeGen/AMDGPU/mad.u16.ll
index 995c8c8679397..e6b59b84567db 100644
--- a/llvm/test/CodeGen/AMDGPU/mad.u16.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad.u16.ll
@@ -338,7 +338,5 @@ define amdgpu_ps i32 @s_mad_u16_sext(i16 inreg %arg0, i16 inreg %arg1, i16 inreg
   %sext = sext i16 %add to i32
   ret i32 %sext
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll b/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
index 620566d3baff3..4b789ee4e6782 100644
--- a/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad24-get-global-id.ll
@@ -3,10 +3,6 @@
 ; If the workgroup id range is restricted, we should be able to use
 ; mad24 for the usual indexing pattern.
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 ; GCN-LABEL: {{^}}get_global_id_0:
 ; GCN: s_and_b32 [[WGSIZEX:s[0-9]+]], {{s[0-9]+}}, 0xffff
 ; GCN: s_mul_i32 [[MUL:s[0-9]+]], s8, [[WGSIZEX]]
diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
index eed4c2e4574b6..92aed2025852e 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
@@ -117,5 +117,3 @@ bb6:                                              ; preds = %bb6, %bb
   %tmp28 = icmp eq i32 %tmp27, %arg1
   br i1 %tmp28, label %bb5, label %bb6
 }
-
-declare i32 @llvm.amdgcn.mul.i24(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index a71b083d22e2b..d0a26cca8082c 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -4,8 +4,6 @@
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
 ; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}u32_mad24:
 ; EG: MULLO_INT
 ; SI: s_mul_i32
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index 9ec37a5e14cdf..9666220d4c2ed 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -9,9 +9,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX10-FMA %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GFX11-FMA %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
 define amdgpu_kernel void @madak_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in.a, ptr addrspace(1) noalias %in.b) #0 {
 ; GFX6-LABEL: madak_f32:
 ; GFX6:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index d5ef866dc8a85..10eaf1d4bfba1 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -5,9 +5,6 @@
  ; possible, but requires the correct registers to be used which is
  ; hard to trigger.
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
 ; GCN-LABEL: {{^}}madmk_f32:
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
@@ -212,7 +209,5 @@ bb6:                                              ; preds = %bb2
   br label %bb2
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/make-buffer-rsrc-lds-fails.ll b/llvm/test/CodeGen/AMDGPU/make-buffer-rsrc-lds-fails.ll
index 0679686f77eef..9fe7c6fadea07 100644
--- a/llvm/test/CodeGen/AMDGPU/make-buffer-rsrc-lds-fails.ll
+++ b/llvm/test/CodeGen/AMDGPU/make-buffer-rsrc-lds-fails.ll
@@ -6,4 +6,3 @@ define amdgpu_ps ptr addrspace(8) @basic_raw_buffer(ptr addrspace(3) inreg %p) {
   %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p3(ptr addrspace(3) %p, i16 0, i32 1234, i32 5678)
   ret ptr addrspace(8) %rsrc
 }
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p3(ptr addrspace(3) nocapture readnone, i16, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/max-literals.ll b/llvm/test/CodeGen/AMDGPU/max-literals.ll
index f1cbf43aa089a..5f38c830b2c54 100644
--- a/llvm/test/CodeGen/AMDGPU/max-literals.ll
+++ b/llvm/test/CodeGen/AMDGPU/max-literals.ll
@@ -59,8 +59,5 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll
index 8ef2ca2765e8a..a130a76ee1bb0 100644
--- a/llvm/test/CodeGen/AMDGPU/max.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll
@@ -417,5 +417,3 @@ define amdgpu_kernel void @v_test_umax_ugt_v2i16(ptr addrspace(1) %out, ptr addr
   store <2 x i16> %val, ptr addrspace(1) %outgep, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/max.ll b/llvm/test/CodeGen/AMDGPU/max.ll
index bef9ff82aa396..75923097ba274 100644
--- a/llvm/test/CodeGen/AMDGPU/max.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.ll
@@ -991,7 +991,5 @@ define amdgpu_kernel void @test_imax_sge_i64(ptr addrspace(1) %out, i64 %a, i64
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/max3.ll b/llvm/test/CodeGen/AMDGPU/max3.ll
index a757bb068cf8d..f60b1a6ba3ae1 100644
--- a/llvm/test/CodeGen/AMDGPU/max3.ll
+++ b/llvm/test/CodeGen/AMDGPU/max3.ll
@@ -260,7 +260,5 @@ define amdgpu_kernel void @v_test_umax3_ugt_i64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll b/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
index 8953d370b44fe..3fd6013989e02 100644
--- a/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
+++ b/llvm/test/CodeGen/AMDGPU/med3-knownbits.ll
@@ -2,11 +2,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti -amdgpu-codegenprepare-mul24=0 -amdgpu-codegenprepare-disable-idiv-expansion < %s | FileCheck -check-prefixes=SI,SI-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti -amdgpu-codegenprepare-mul24=0 -amdgpu-codegenprepare-disable-idiv-expansion < %s | FileCheck -check-prefixes=SI,SI-GISEL %s
 
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.smax.i32(i32, i32)
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.umax.i32(i32, i32)
-
 ; Test computeKnownBits for umed3 node. We know the base address has a
 ; 0 sign bit only after umed3 is formed. The DS instruction offset can
 ; only be folded on SI with a positive base address.
diff --git a/llvm/test/CodeGen/AMDGPU/med3-no-simplify.ll b/llvm/test/CodeGen/AMDGPU/med3-no-simplify.ll
index 9d29b328346a9..436be9c6e1a2c 100644
--- a/llvm/test/CodeGen/AMDGPU/med3-no-simplify.ll
+++ b/llvm/test/CodeGen/AMDGPU/med3-no-simplify.ll
@@ -6,8 +6,6 @@
 ; -amdgpu-scalar-ir-passes=false flag, because InstSimplify would constant
 ; fold these functions otherwise.
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_constant_order_i32:
 ; GCN: v_max_u32_e32 v{{[0-9]+}}, 17, v{{[0-9]+}}
 ; GCN: v_min_u32_e32 v{{[0-9]+}}, 12, v{{[0-9]+}}
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
index 1dd18b4228fe5..77f710d1b7ed6 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-crash-issue63986.ll
@@ -313,8 +313,5 @@ loop-memcpy-residual.preheader:                   ; preds = %loop-memcpy-residua
   br label %loop-memcpy-residual
 }
 
-declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
-declare i64 @llvm.umin.i64(i64, i64) #1
-
 attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
 attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
index a5e0ceaa6b329..7b2efb8bd7265 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-fixed-align.ll
@@ -54,6 +54,4 @@ define void @memcpy_fixed_align(ptr addrspace(5)  %dst, ptr addrspace(1) %src) {
   ret void
 }
 
-declare void @llvm.memcpy.p5.p1.i64(ptr addrspace(5) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg) #0
-
 attributes #0 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll b/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
index c1a0bfb48e5f2..ecfa177761dad 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-inline-fails.ll
@@ -2,8 +2,6 @@
 ; RUN: llc < %s -mtriple=r600-unknown-linux-gnu 2> %t.err || true	
 ; RUN: FileCheck --input-file %t.err %s
 
-declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
-
 define void @test1(ptr %a, ptr %b) nounwind {
 ; CHECK: LLVM ERROR
   tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %a, ptr %b, i64 8, i1 0 )
diff --git a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
index ce3bd34cc1b4a..84083de4feb2c 100644
--- a/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
+++ b/llvm/test/CodeGen/AMDGPU/memcpy-scoped-aa.ll
@@ -89,11 +89,6 @@ define i32 @test_memset(ptr addrspace(1) nocapture %p, ptr addrspace(1) nocaptur
   ret i32 %add
 }
 
-declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
-declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias nocapture writeonly, ptr addrspace(1) noalias nocapture readonly, i64, i1 immarg)
-declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(1) nocapture readonly, i64, i1 immarg)
-declare void @llvm.memset.p1.i64(ptr addrspace(1) nocapture writeonly, i8, i64, i1 immarg)
-
 !0 = distinct !{!0, !"bax"}
 !1 = distinct !{!1, !0, !"bax: %p"}
 !2 = !{!1}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
index 2e6a73bb2cc00..0e03b8575aa96 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-atomic-insert-end.mir
@@ -1,7 +1,6 @@
 # RUN: llc -mtriple=amdgcn -run-pass=si-memory-legalizer  %s -o - | FileCheck %s
 
 --- |
-  declare i32 @llvm.amdgcn.workitem.id.x() #0
 
   define amdgpu_kernel void @atomic_max_i32_noret(
       ptr addrspace(1) %out,
@@ -29,10 +28,6 @@
     ret void
   }
 
-  declare { i1, i64 } @llvm.amdgcn.if(i1)
-
-  declare void @llvm.amdgcn.end.cf(i64)
-
   attributes #0 = { nounwind readnone }
   attributes #1 = { nounwind "target-cpu"="gfx803" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
index 71ec5512c72de..7c400f384a978 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -77,7 +77,6 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-CU: {{.*}}
 ; GFX12-WGP: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index f6fac11d634c5..9e00ed311de26 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -850,4 +850,3 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index 735e541b9f7c9..57ff38ca26fe7 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -698,5 +698,3 @@ entry:
   store atomic volatile i32 %in, ptr %out syncscope("workgroup") release, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
index fa2fc3c423694..dcf3568f9131a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -78,7 +78,6 @@ entry:
   ret void
 }
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-CU: {{.*}}
 ; GFX12-WGP: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index c95ff2c691a3c..19e73f8b1163b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -833,4 +833,3 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
index 263324ed5c03b..24b95592000c8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
@@ -742,5 +742,3 @@ entry:
   store atomic volatile i32 %in, ptr addrspace(1) %out syncscope("workgroup") release, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
index 16ede2665400e..b3f5488a39852 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
@@ -897,4 +897,3 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index ac09f65c32f41..9840b6a39413a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -714,5 +714,3 @@ entry:
   store atomic volatile i32 %in, ptr addrspace(3) %out syncscope("workgroup") release, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index 18df16988d8e4..b271c1461e67c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -29,25 +29,18 @@
   }
 
   ; Function Attrs: convergent nounwind
-  declare { i1, i64 } @llvm.amdgcn.if(i1) #1
 
   ; Function Attrs: convergent nounwind
-  declare { i1, i64 } @llvm.amdgcn.else(i64) #1
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.break(i64) #2
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.if.break(i1, i64) #2
 
   ; Function Attrs: convergent nounwind readnone
-  declare i64 @llvm.amdgcn.else.break(i64, i64) #2
 
   ; Function Attrs: convergent nounwind
-  declare i1 @llvm.amdgcn.loop(i64) #1
 
   ; Function Attrs: convergent nounwind
-  declare void @llvm.amdgcn.end.cf(i64) #1
 
   attributes #0 = { "target-cpu"="gfx803" }
   attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
index 1ce9fc308af31..24a007868d55d 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -84,7 +84,6 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-CU: {{.*}}
 ; GFX12-WGP: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index 30296a9c3d896..3340b155b4cc2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -1017,4 +1017,3 @@ entry:
 }
 
 !0 = !{i32 1}
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
index 216b1eccf3a69..b668733e0ae3e 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
@@ -614,5 +614,3 @@ entry:
   store volatile i32 %val, ptr addrspace(5) %out.gep
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
index 940287d44d8d1..726f6e2312e9c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
@@ -507,7 +507,3 @@ define amdgpu_kernel void @flat_scratch_load_clause(float %a, float %b, <8 x i32
   call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 1, float %valadd, float undef, float undef, float undef, i1 immarg true, i1 immarg true)
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
-declare <2 x float> @llvm.amdgcn.image.sample.2d.v2f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
index bcd80a2a03b05..2a30c0b1502cd 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-crash.ll
@@ -30,6 +30,4 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
index 88cc5334d5c73..5a259ac2a5c72 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-store-usedef.ll
@@ -17,7 +17,5 @@ define amdgpu_vs void @test1(i32 %v) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/merge-stores.ll b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
index 79f15123f2b26..e5ca703366cce 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/merge-stores.ll
@@ -679,7 +679,5 @@ define amdgpu_kernel void @copy_v3f64_align4(ptr addrspace(1) noalias %out, ptr
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
index 7bb09f6697b68..875be72598c02 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-bf16-vgpr-cd-select.ll
@@ -1,21 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16>, <2 x i16>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16>, <2 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16>, <2 x i16>, <4 x float>, i32, i32, i32)
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16>, <4 x i16>, <4 x float>, i32, i32, i32)
-declare <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double, double, <4 x double>, i32, i32, i32)
-declare double @llvm.amdgcn.mfma.f64.4x4x4f64(double, double, double, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32, i32, <16 x i32>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x2bf16:
 ; GCN:  v_mfma_f32_32x32x2bf16 v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
index ba34c1bbe1d71..6a8a2aaa5c7aa 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-cd-select.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN,GFX90A %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vgpr:
 ; GFX908: v_mfma_f32_32x32x1{{.*}} a[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, a[{{[0-9:]+}}]
 ; GFX90A: v_mfma_f32_32x32x1{{.*}} v[{{[0-9:]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9:]+}}]
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
index d5aeff7e819dd..72e5bffa0a997 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-loop.ll
@@ -627,7 +627,4 @@ exit:
   ret void
 }
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
index 8dbbab3c57f72..2043ffc9834be 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
@@ -8,10 +8,6 @@
 ; Check that Dst and SrcC of MFMA instructions reading more than 4 registers as SrcC
 ; is either completely disjoint or exactly the same, but does not alias.
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32:
 ; GREEDY: v_mfma_f32_32x32x1{{.*}} a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
 ; GREEDY: v_mfma_f32_32x32x1{{.*}} a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll
index 59b13c02f92fb..af9bd40da70e3 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select-gfx940.ll
@@ -1,33 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefix=GCN %s
 
-declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64, i64, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64, i64, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64, i64, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64, i64, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half>, <8 x half>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half>, <8 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16>, <8 x i16>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16>, <8 x i16>, <16 x float>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32>, <4 x i32>, <4 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32>, <4 x i32>, <16 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32>, <4 x i32>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32>, <4 x i32>, <16 x float>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_i32_16x16x32i8:
 ; GCN: v_mfma_i32_16x16x32_i8 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @test_mfma_i32_16x16x32i8(ptr addrspace(1) %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
index 06775f5d3f92b..2ce9c6aa4c948 100644
--- a/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/mfma-vgpr-cd-select.ll
@@ -3,20 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s
 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefixes=GCN %s
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float, float, <4 x float>, i32, i32, i32)
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32)
-declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half>, <4 x half>, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half>, <4 x half>, <4 x float>, i32, i32, i32)
-declare <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32, i32, <32 x i32>, i32, i32, i32)
-declare <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32, i32, <16 x i32>, i32, i32, i32)
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32:
 ; GCN: v_mfma_f32_32x32x1{{.*}} v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9:]+}}, v[{{[0-9]+:[0-9]+}}]
 define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) {
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 9dafa27ece86f..5086537f0b2b2 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -4306,7 +4306,5 @@ define amdgpu_kernel void @v_test_imin_ule_v2i16(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/min3.ll b/llvm/test/CodeGen/AMDGPU/min3.ll
index 0e25540f5dd2e..0da2f80334773 100644
--- a/llvm/test/CodeGen/AMDGPU/min3.ll
+++ b/llvm/test/CodeGen/AMDGPU/min3.ll
@@ -330,7 +330,5 @@ define amdgpu_kernel void @v_test_umin3_ult_i64(ptr addrspace(1) %out, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
index 062955d722068..7de5aeec70fec 100644
--- a/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
+++ b/llvm/test/CodeGen/AMDGPU/minimummaximum.ll
@@ -137,8 +137,3 @@ define amdgpu_ps void @s_test_minmax_f16(half inreg %a, half inreg %b, half inre
   store half %sminmax, ptr addrspace(1) %out
   ret void
 }
-
-declare half @llvm.minimum.f16(half, half)
-declare half @llvm.maximum.f16(half, half)
-declare float @llvm.minimum.f32(float, float)
-declare float @llvm.maximum.f32(float, float)
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index b5b9997f297c2..b7b4025ff12a7 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -393,14 +393,5 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
   store half %tmp3, ptr addrspace(1) %arg
   ret void
 }
-
-declare i32 @llvm.smin.i32(i32, i32)
-declare i32 @llvm.smax.i32(i32, i32)
-declare i32 @llvm.umin.i32(i32, i32)
-declare i32 @llvm.umax.i32(i32, i32)
-declare half @llvm.minnum.f16(half, half)
-declare half @llvm.maxnum.f16(half, half)
-declare float @llvm.minnum.f32(float, float)
-declare float @llvm.maxnum.f32(float, float)
 attributes #0 = { nounwind "unsafe-fp-math"="false" "no-nans-fp-math"="true" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
index fe4c2e4b488b8..16da89af6f090 100644
--- a/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
+++ b/llvm/test/CodeGen/AMDGPU/mixed-wave32-wave64.ll
@@ -31,9 +31,6 @@ define amdgpu_gs void @_amdgpu_gs_main() #4 {
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-
 attributes #0 = { "amdgpu-max-work-group-size"="128" "target-features"=",+wavefrontsize32" }
 attributes #1 = { "target-features"=",+wavefrontsize64" }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll b/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
index 5977566e2d00b..2d761f994c77d 100644
--- a/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
+++ b/llvm/test/CodeGen/AMDGPU/mixed_wave32_wave64.ll
@@ -95,8 +95,6 @@ endif:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #2
-
 attributes #0 = { nounwind "InitialPSInputAddr"="2" "target-features"="+wavefrontsize64" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind "target-features"="+wavefrontsize32" }
diff --git a/llvm/test/CodeGen/AMDGPU/mmo-target-flags-folding.ll b/llvm/test/CodeGen/AMDGPU/mmo-target-flags-folding.ll
index 9cf0e0623bef2..401cf01214b53 100644
--- a/llvm/test/CodeGen/AMDGPU/mmo-target-flags-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/mmo-target-flags-folding.ll
@@ -19,6 +19,4 @@ entry:
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
index 4332d9daeaaf5..2d9d4bd21f363 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw-system.ll
@@ -8,8 +8,6 @@
 ; Check that moving the pointer out of the resource descriptor to
 ; vaddr works for atomics.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
 ; GCN-LABEL: atomic_max_i32:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
index 63688ebeab9d0..f03a9425e068a 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-atomicrmw.ll
@@ -8,8 +8,6 @@
 ; Check that moving the pointer out of the resource descriptor to
 ; vaddr works for atomics.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %x, i32 %y) #0 {
 ; GCN-LABEL: atomic_max_i32:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
index 9d6e0927b0dfd..4f4f5836345ab 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-ctlz-cttz.ll
@@ -2,9 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
 
-declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
-declare i64 @llvm.cttz.i64(i64, i1) nounwind readnone
-
 define amdgpu_kernel void @ctlz_i64_poison(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; GFX9-LABEL: ctlz_i64_poison:
 ; GFX9:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
index 1cd9afef13b5e..6d4d3a0740f63 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-pseudo-scalar-trans.ll
@@ -205,14 +205,3 @@ define amdgpu_kernel void @sqrt_f16(ptr addrspace(1) %ptr) {
   store half %res, ptr addrspace(1) %ptr
   ret void
 }
-
-declare float @llvm.amdgcn.exp2.f32(float)
-declare half @llvm.amdgcn.exp2.f16(half)
-declare float @llvm.amdgcn.log.f32(float)
-declare half @llvm.amdgcn.log.f16(half)
-declare float @llvm.amdgcn.rcp.f32(float)
-declare half @llvm.amdgcn.rcp.f16(half)
-declare float @llvm.amdgcn.rsq.f32(float)
-declare half @llvm.amdgcn.rsq.f16(half)
-declare float @llvm.amdgcn.sqrt.f32(float)
-declare half @llvm.amdgcn.sqrt.f16(half)
diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll
index 003c3ea7fce10..d1a2851de3164 100644
--- a/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll
+++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll
@@ -383,6 +383,3 @@ main_body:
   %v = call float @llvm.amdgcn.image.sample.1d.f32.f32(i32 1, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
   ret float %v
 }
-
-declare float @llvm.amdgcn.image.load.2d.f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare float @llvm.amdgcn.image.sample.1d.f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
index b5ee6689f8dc3..da8d74b1b7547 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll
@@ -1286,8 +1286,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
index 162c47f879465..321d4c7526b88 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
@@ -1363,8 +1363,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
index c0c93f7badde2..0584cff3f4b89 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr-non-ptr-intrinsics.ll
@@ -46,8 +46,5 @@ main_body:
   ret float %tmp7
 }
 
-declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32 immarg) #1
-declare float @llvm.amdgcn.raw.buffer.load.format.f32(<4 x i32>, i32, i32, i32 immarg) #1
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
index 202ce009ef69a..1d9dc8da6ad77 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-shader-vgpr.ll
@@ -43,8 +43,5 @@ main_body:
   ret float %tmp7
 }
 
-declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #1
-declare float @llvm.amdgcn.raw.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32 immarg) #1
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index dd9f5fa7fcb15..8647287ad4ecb 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -172,11 +172,6 @@ define amdgpu_kernel void @store_vgpr_ptr(ptr addrspace(1) %out) {
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare void @llvm.amdgcn.raw.ptr.tbuffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg, i32 immarg) #2
-declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32 immarg) #3
-
 attributes #0 = { nounwind readonly }
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/mul.ll b/llvm/test/CodeGen/AMDGPU/mul.ll
index b4272049f36a4..205b9c2cfbb59 100644
--- a/llvm/test/CodeGen/AMDGPU/mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul.ll
@@ -3358,7 +3358,5 @@ define i32 @mul_pow2_plus_1(i32 %val) {
   ret i32 %mul
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone}
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 1e9994dd8e6ef..7b0e84bb25b3e 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -261,7 +261,6 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
 }
 
 declare void @foo(i32) #2
-declare float @llvm.fmuladd.f32(float, float, float) #1
 
 attributes #0 = { nounwind willreturn "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
index 3a16c88f32cc3..1e2249d5b2c9a 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-amdgcn.ll
@@ -3,9 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 define amdgpu_kernel void @test_umul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
 ; SI-LABEL: test_umul24_i32:
 ; SI:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 4eefff504f19e..fbee9e991244b 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -747,8 +747,5 @@ bb27:                                             ; preds = %.entry
   ret void
 }
 
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
index 16de2c0c6de08..cb840a47a2555 100644
--- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
+++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll
@@ -246,7 +246,5 @@ bb9:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
index f6e3509eb029b..e71e0317aef08 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-loop-conditions.ll
@@ -339,7 +339,5 @@ bb31:                                             ; preds = %bb31.loopexit, %bb1
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
index d62f045674ace..58dceb864e96d 100644
--- a/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-dup-inst-prefetch.ll
@@ -119,10 +119,8 @@ loop0_merge:                                      ; preds = %branch2_merge, %bb
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fma.f32(float, float, float) #0
 
 ; Function Attrs: nounwind readonly willreturn
-declare float @llvm.amdgcn.image.sample.lz.3d.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
 
 attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/no-shrink-extloads.ll b/llvm/test/CodeGen/AMDGPU/no-shrink-extloads.ll
index c6b1fe81983aa..4b28dc841ac25 100644
--- a/llvm/test/CodeGen/AMDGPU/no-shrink-extloads.ll
+++ b/llvm/test/CodeGen/AMDGPU/no-shrink-extloads.ll
@@ -1,7 +1,5 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; Make sure we don't turn the 32-bit argument load into a 16-bit
 ; load. There aren't extending scalar lods, so that would require
 ; using a buffer_load instruction.
diff --git a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
index 7152ed4eb8d46..c0b15ec751c63 100644
--- a/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
@@ -601,6 +601,3 @@ entry:
   store i32 %ld, ptr addrspace(1) %out, align 4
   ret void
 }
-
-declare void @llvm.amdgcn.s.barrier()
-declare void @llvm.amdgcn.wave.barrier()
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index 125e6bc0f787f..0b03c43c93be4 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -399,8 +399,6 @@ bb.1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
index 6df702d61602a..de9edf635d4ee 100644
--- a/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
+++ b/llvm/test/CodeGen/AMDGPU/nsa-reassign.ll
@@ -96,7 +96,4 @@ main_body:
   ret float %v
 }
 
-declare float @llvm.amdgcn.image.sample.3d.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare float @llvm.amdgcn.image.sample.c.l.3d.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-
 attributes #0 = {"amdgpu-num-vgpr"="10"}
diff --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll
index 769d035858ca8..fcc584dd4dbef 100644
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -1287,20 +1287,6 @@ define amdgpu_ps void @v_omod_mac_to_mad(float %b, float %a) #0 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
-declare double @llvm.fabs.f64(double) #1
-declare double @llvm.minnum.f64(double, double) #1
-declare double @llvm.maxnum.f64(double, double) #1
-declare half @llvm.fabs.f16(half) #1
-declare half @llvm.minnum.f16(half, half) #1
-declare half @llvm.maxnum.f16(half, half) #1
-declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
-
 attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" "no-signed-zeros-fp-math"="true" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "denormal-fp-math-f32"="ieee,ieee" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index 1e3299a023736..d83ecea305fe8 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -148,7 +148,5 @@ if.else:
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
index e21b93a386c3e..40ff256d87b48 100644
--- a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll
@@ -630,7 +630,5 @@ define amdgpu_kernel void @v_pack_v2f16_inline_imm_hi(ptr addrspace(1) %in0) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
index 4b21493bd7ca6..1fdb655b50d61 100644
--- a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll
@@ -555,8 +555,6 @@ define amdgpu_kernel void @v_pack_v2i16_inline_imm_hi(ptr addrspace(1) %in0) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index 6b7eff316fe95..5e246b77b4abb 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -603,8 +603,3 @@ define amdgpu_kernel void @fneg_v2f32_scalar(ptr addrspace(1) %a, <2 x float> %x
   store <2 x float> %fneg, ptr addrspace(1) %a, align 8
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
-declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
-declare <32 x float> @llvm.fma.v32f32(<32 x float>, <32 x float>, <32 x float>)
diff --git a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
index 4d6adc7cc9417..1a41aeb10b78a 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-op-sel.ll
@@ -685,7 +685,5 @@ bb:
   ret void
 }
 
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
index cabc11037017e..df457e92dfb84 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll
@@ -151,10 +151,8 @@ define dllexport amdgpu_ps void @ps_shader() #1 {
 declare ptr addrspace(7) @lgc.buffer.desc.to.ptr(<4 x i32>) #1
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare i64 @llvm.amdgcn.s.getpc() #2
 
 ; Function Attrs: nocallback nofree nosync nounwind willreturn memory(write)
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg) #3
 
 attributes #0 = { nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1024,1024" "amdgpu-memory-bound"="false" "amdgpu-unroll-threshold"="700" "amdgpu-wave-limiter"="false" "amdgpu-work-group-info-arg-no"="4" "denormal-fp-math-f32"="preserve-sign" "target-features"=",+wavefrontsize64,+cumode" }
 
diff --git a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
index 2e9f09ad41813..f016b7f42a993 100644
--- a/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/pal-simple-indirect-call.ll
@@ -64,7 +64,6 @@ define amdgpu_cs void @test_simple_indirect_call() {
 }
 
 ; Function Attrs: nounwind readnone speculatable willreturn
-declare i64 @llvm.amdgcn.s.getpc() #0
 
 attributes #0 = { nounwind readnone speculatable willreturn }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 45fbaaabc65b5..c3cf80970b0ec 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -109,6 +109,4 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ret void
 }
 
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-
 attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }
diff --git a/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll b/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
index 70f4f9688e986..8ad62b3eece5c 100644
--- a/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
+++ b/llvm/test/CodeGen/AMDGPU/partially-dead-super-register-immediate.ll
@@ -8,8 +8,6 @@
 ; During live interval construction, the first sub register def is
 ; incorrectly marked as dead.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 define amdgpu_kernel void @dead_def_subregister(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %in.gep = getelementptr i64, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll
index 9cf7bd4edd20d..c6185640fa70f 100644
--- a/llvm/test/CodeGen/AMDGPU/perfhint.ll
+++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll
@@ -167,5 +167,3 @@ bb1:                                              ; preds = %bb1, %bb
 bb2:                                              ; preds = %bb1
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll b/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll
index 031a46271f2c0..6063f86478434 100644
--- a/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/permlane-op-sel.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1030 -d - | FileCheck -check-prefix=OBJ %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefix=ASM %s
 
-declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1)
-
 ; OBJ-LABEL: <permlane_op_sel>:
 ; OBJ: v_permlane16_b32 v0, v0, s7, s0 op_sel:[1,0]
 
diff --git a/llvm/test/CodeGen/AMDGPU/permlane16_opsel.ll b/llvm/test/CodeGen/AMDGPU/permlane16_opsel.ll
index caa7fb8df1990..327d75957ebb8 100644
--- a/llvm/test/CodeGen/AMDGPU/permlane16_opsel.ll
+++ b/llvm/test/CodeGen/AMDGPU/permlane16_opsel.ll
@@ -3,11 +3,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 --stop-after=amdgpu-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG,SDAG-GFX11 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 --stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL %s
 
-declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1)
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
-
 define amdgpu_kernel void @v_permlane16_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) {
   ; SDAG: V_PERMLANE16_B32_e64 0, {{%[0-9]+}}, 0, killed {{%[0-9]+}}, 0, killed {{%[0-9]+}}, {{%[0-9]+}}, 0, implicit $exec
   ; GISEL: V_PERMLANE16_B32_e64 0, {{%[0-9]+}}, 0, {{%[0-9]+}}, 0, {{%[0-9]+}}, {{%[0-9]+}}, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll b/llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll
index acfc0ac086ce8..8cb8050c9bc83 100644
--- a/llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll
+++ b/llvm/test/CodeGen/AMDGPU/permlane16_var-op-sel.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -filetype=obj < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1200 -d - | FileCheck -check-prefix=OBJ %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefix=ASM %s
 
-declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1)
-
 ; OBJ-LABEL: <permlane_op_sel>:
 ; OBJ: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0]
 
diff --git a/llvm/test/CodeGen/AMDGPU/permute.ll b/llvm/test/CodeGen/AMDGPU/permute.ll
index 6cab2b1839307..e5259060358ef 100644
--- a/llvm/test/CodeGen/AMDGPU/permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute.ll
@@ -393,5 +393,3 @@ bb:
   store i32 %v, ptr addrspace(1) %arg, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
index 8ac332197215f..d5397492e9ddb 100644
--- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll
@@ -665,9 +665,6 @@ bb:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
-
 ; Not combined to perm due to non-vectorized use, non-divergent
 define hidden void @add(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %elt, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: add:
@@ -2792,8 +2789,6 @@ define hidden void @extract3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.perm(i32, i32, i32)
-
 define hidden void @extract_perm_3744(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: extract_perm_3744:
 ; GFX10:       ; %bb.0:
@@ -2881,9 +2876,6 @@ define hidden void @extract1347_v2i16(ptr addrspace(1) %in0, ptr addrspace(1) %i
   ret void
 }
 
-
-declare i16 @llvm.fshr.i16(i16, i16, i16)
-
 define hidden void @fshri16_8(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: fshri16_8:
 ; GFX10:       ; %bb.0:
@@ -3084,8 +3076,6 @@ define hidden void @fshri16_88(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr
   ret void
 }
 
-declare i16 @llvm.fshl.i16(i16, i16, i16)
-
 define hidden void @fshli16_1347(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %out0) {
 ; GFX10-LABEL: fshli16_1347:
 ; GFX10:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
index 9a6cfb72f28f1..df136f1f5a0a2 100644
--- a/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/pk_max_f16_literal.ll
@@ -137,6 +137,3 @@ bb:
   store <2 x half> %tmp4, ptr addrspace(1) %tmp2, align 4
   ret void
 }
-
-declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
index 4794c29621525..ad49aadd80634 100644
--- a/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/post-ra-soft-clause-dbg-info.ll
@@ -32,9 +32,6 @@ define amdgpu_kernel void @dbg_clause(ptr addrspace(1) %out, ptr addrspace(1) %a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
 
diff --git a/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll b/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
index 9e2778bb3e71a..23920cd6303c0 100644
--- a/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
+++ b/llvm/test/CodeGen/AMDGPU/predicate-dp4.ll
@@ -20,7 +20,4 @@ ENDIF:                                            ; preds = %IF, %main_body
   call void @llvm.r600.store.swizzle(<4 x float> %6, i32 0, i32 0)
   ret void
 }
-
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
index e7488e059ee9d..6eff60d98834e 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-kernargs-inreg-hints.ll
@@ -258,6 +258,5 @@ define amdgpu_kernel void @test_preload_hint_kernel_incompatible_attributes(ptr
 }
 
 declare void @func(ptr) #0
-declare void @llvm.amdgcn.set.prio(i16)
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
index 4a1287c56ea8e..758e563a11166 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll
@@ -777,7 +777,3 @@ define i32 @zext_fptrunc_fma_f16(float %x, float %y, float %z) {
   %zext = zext i16 %cast to i32
   ret i32 %zext
 }
-
-declare half @llvm.amdgcn.div.fixup.f16(half, half, half)
-declare half @llvm.fma.f16(half, half, half)
-declare float @llvm.fma.f32(float, float, float)
diff --git a/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll b/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll
index b4850930b1d22..084c67f7cf1d7 100644
--- a/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/preserve-user-waitcnt.ll
@@ -49,5 +49,3 @@ define void @test_waitcnt_builtin_non_kernel() {
   call void @llvm.amdgcn.s.waitcnt(i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.s.waitcnt(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
index 462ab38b4cd58..7da0e908d50fc 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll
@@ -3,8 +3,6 @@
 ; RUN: opt -S -mtriple=r600-unknown-unknown -mcpu=redwood -passes=amdgpu-promote-alloca -disable-promote-alloca-to-vector < %s | FileCheck -check-prefix=OPT %s
 target datalayout = "A5"
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}mova_same_clause:
 
 ; R600: LDS_WRITE
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index bd9234d864b3d..d74d427714d1d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -295,11 +295,6 @@ define amdgpu_vs void @promote_memcpy_inline_aggr() #0 {
   ret void
 }
 
-declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
-declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
-declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
-declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg)
-
 @tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> }
 @frag_color = external addrspace(1) global <4 x float>
 
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
index ad2db7485808d..be71ec1748638 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-calling-conv.ll
@@ -90,8 +90,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,64" }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
index a585901fc377c..5c7a370e4b60d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-invariant-markers.ll
@@ -1,10 +1,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn-- -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 target datalayout = "A5"
 
-declare ptr @llvm.invariant.start.p5(i64, ptr addrspace(5) nocapture) #0
-declare void @llvm.invariant.end.p5(ptr, i64, ptr addrspace(5) nocapture) #0
-declare ptr addrspace(5) @llvm.launder.invariant.group.p5(ptr addrspace(5)) #1
-
 ; GCN-LABEL: {{^}}use_invariant_promotable_lds:
 ; GCN: buffer_load_dword
 ; GCN: ds_write_b32
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
index 2230e12a30f59..871d2700ae488 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-lifetime.ll
@@ -2,9 +2,6 @@
 
 target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
 
-declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #0
-declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #0
-
 ; OPT-LABEL: @use_lifetime_promotable_lds(
 ; OPT-NOT: alloca i32
 ; OPT-NOT: llvm.lifetime
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
index aabd5df956837..e9c4dd2aa315c 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -1,17 +1,5 @@
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -passes=amdgpu-promote-alloca < %s | FileCheck --enable-var-scope %s
 
-declare void @llvm.memcpy.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0
-declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0
-declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0
-
-declare void @llvm.memmove.p5.p1.i32(ptr addrspace(5) nocapture, ptr addrspace(1) nocapture, i32, i1) #0
-declare void @llvm.memmove.p1.p5.i32(ptr addrspace(1) nocapture, ptr addrspace(5) nocapture, i32, i1) #0
-declare void @llvm.memmove.p5.p5.i64(ptr addrspace(5) nocapture, ptr addrspace(5) nocapture, i64, i1) #0
-
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i1) #0
-
-declare i32 @llvm.objectsize.i32.p5(ptr addrspace(5), i1, i1, i1) #1
-
 ; CHECK-LABEL: @promote_with_memcpy(
 ; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds [64 x [17 x i32]], ptr addrspace(3) @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
 ; CHECK: call void @llvm.memcpy.p3.p1.i32(ptr addrspace(3) align 4 [[GEP]], ptr addrspace(1) align 4 %in, i32 68, i1 false)
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
index f1e2737b370ef..48ad17428e09d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-memset.ll
@@ -137,5 +137,3 @@ define amdgpu_kernel void @memset_array_of_vec_ptr_alloca(ptr %out) {
   store i64 %load, ptr %out
   ret void
 }
-
-declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
index 1fe1904592a7d..f1b75770085c2 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-unhandled-intrinsic.ll
@@ -3,8 +3,6 @@
 ; This is just an arbitrary intrinisic that shouldn't be
 ; handled to ensure it doesn't crash.
 
-declare void @llvm.stackrestore.p5(ptr addrspace(5)) #2
-
 ; CHECK-LABEL: @try_promote_unhandled_intrinsic(
 ; CHECK: alloca
 ; CHECK: call void @llvm.stackrestore.p5(ptr addrspace(5) %tmp)
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
index 99a4bb83c0c44..eba47cf2795f5 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
@@ -180,6 +180,3 @@ entry:
   %tmp1 = load i64, ptr addrspace(5) %private_iptr, align 8
   ret i64 %tmp1
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
diff --git a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
index c9686e661fe14..8d064a267d3bd 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-kernel-arguments.ll
@@ -472,6 +472,3 @@ entry:
   store float 0.000000e+00, ptr %p3, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare void @llvm.amdgcn.s.barrier()
diff --git a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
index bbbbc0dc0f28d..214bc1b6345d4 100644
--- a/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
+++ b/llvm/test/CodeGen/AMDGPU/pseudo-scalar-transcendental.ll
@@ -338,20 +338,3 @@ define amdgpu_cs half @srcmods_neg_f16(half inreg %src) {
   %result = call half @llvm.log2.f16(half %neg)
   ret half %result
 }
-
-declare half @llvm.exp2.f16(half)
-declare float @llvm.exp2.f32(float)
-declare half @llvm.amdgcn.exp2.f16(half)
-declare float @llvm.amdgcn.exp2.f32(float)
-declare half @llvm.log2.f16(half)
-declare float @llvm.log2.f32(float)
-declare half @llvm.amdgcn.log.f16(half)
-declare float @llvm.amdgcn.log.f32(float)
-declare half @llvm.amdgcn.rcp.f16(half)
-declare float @llvm.amdgcn.rcp.f32(float)
-declare half @llvm.sqrt.f16(half)
-declare float @llvm.sqrt.f32(float)
-declare half @llvm.amdgcn.sqrt.f16(half)
-declare float @llvm.amdgcn.sqrt.f32(float)
-declare half @llvm.fabs.f16(half)
-declare float @llvm.fabs.f32(float)
diff --git a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
index e29f09dcac024..e0a08d408abc5 100644
--- a/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptr-arg-dbg-value.ll
@@ -87,8 +87,6 @@ define hidden void @ptr_arg_in_memory(<32 x i32>, ptr %arg3) #0 !dbg !31 {
   ret void, !dbg !36
 }
 
-declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
-
 attributes #0 = { nounwind }
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/ptr-buffer-alias-scheduling.ll b/llvm/test/CodeGen/AMDGPU/ptr-buffer-alias-scheduling.ll
index 5bb260c09c9dd..a7ddfb7a35eae 100644
--- a/llvm/test/CodeGen/AMDGPU/ptr-buffer-alias-scheduling.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptr-buffer-alias-scheduling.ll
@@ -206,9 +206,3 @@ define amdgpu_kernel void @independent_offsets(ptr addrspace(8) %a) {
 
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg)
-declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr readnone nocapture, i16, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
index 7062270678933..3acc9d789ecf4 100644
--- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
@@ -65,9 +65,6 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
   ret ptr addrspace(3) %masked
 }
 
-declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
-declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX10: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/pv-packing.ll b/llvm/test/CodeGen/AMDGPU/pv-packing.ll
index 1d6c751278c68..c81e9a5e15694 100644
--- a/llvm/test/CodeGen/AMDGPU/pv-packing.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv-packing.ll
@@ -53,9 +53,5 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll
index 856457aa58cbf..c6b7be6976935 100644
--- a/llvm/test/CodeGen/AMDGPU/pv.ll
+++ b/llvm/test/CodeGen/AMDGPU/pv.ll
@@ -224,13 +224,5 @@ main_body:
   ret void
 }
 
-declare float @llvm.minnum.f32(float, float) #0
-declare float @llvm.maxnum.f32(float, float) #0
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #0
-declare float @llvm.fabs.f32(float) #0
-declare float @llvm.r600.recipsqrt.clamped.f32(float) #0
-declare float @llvm.pow.f32(float, float) #0
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #1
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/r600-encoding.ll b/llvm/test/CodeGen/AMDGPU/r600-encoding.ll
index 2580c58f56057..fb387bc638867 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-encoding.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-encoding.ll
@@ -19,5 +19,3 @@ entry:
   call void @llvm.r600.store.swizzle(<4 x float> %vec, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
index 21a7050c2c320..2335bc885c58c 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll
@@ -167,5 +167,3 @@ main_body:
   call void @llvm.r600.store.swizzle(<4 x float> %113, i32 6, i32 2)
   ret void
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll b/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
index 54cc43ce36b9f..f2b1817088747 100644
--- a/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600-infinite-loop-bug-while-reorganizing-vector.ll
@@ -45,14 +45,10 @@ main_body:
 }
 
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.cube(<4 x float>) #0
 
 ; Function Attrs: readnone
 declare float @fabsf(float) #0
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 ; Function Attrs: readnone
-declare <4 x float> @llvm.r600.texc(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/r600.add.ll b/llvm/test/CodeGen/AMDGPU/r600.add.ll
index efa3b80faccc4..2d24a83103abf 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.add.ll
@@ -161,7 +161,5 @@ endif:
   ret void
 }
 
-declare i32 @llvm.r600.read.tidig.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
index 03e3712189728..82cdc944b465f 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=r600 -mcpu=cypress < %s | FileCheck %s -check-prefix=R600 -check-prefix=FUNC
 
-declare i32 @llvm.r600.read.tidig.x() nounwind readnone
-
 
 ; Make sure we don't overwrite workitem information with private memory
 
diff --git a/llvm/test/CodeGen/AMDGPU/r600.sub.ll b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
index 17b1c4afa08de..7186a01cd9770 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.sub.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s
 
-declare i32 @llvm.r600.read.tidig.x() readnone
-
 ; FUNC-LABEL: {{^}}s_sub_i32:
 define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
   %result = sub i32 %a, %b
diff --git a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
index 52b0eaf23ef2b..d5196f030ab63 100644
--- a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
@@ -84,14 +84,4 @@ define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1
   ret void
 }
 
-declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0
-
-declare i32 @llvm.r600.read.tgid.x() #0
-declare i32 @llvm.r600.read.tgid.y() #0
-declare i32 @llvm.r600.read.tgid.z() #0
-
-declare i32 @llvm.r600.read.tidig.x() #0
-declare i32 @llvm.r600.read.tidig.y() #0
-declare i32 @llvm.r600.read.tidig.z() #0
-
 attributes #0 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/r600cfg.ll b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
index 663b13a913ee2..10c4ea2672cc4 100644
--- a/llvm/test/CodeGen/AMDGPU/r600cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/r600cfg.ll
@@ -111,7 +111,3 @@ ENDIF48:                                          ; preds = %LOOP47
   %70 = bitcast i32 %69 to float
   br label %LOOP47
 }
-
-declare void @llvm.r600.store.stream.output(<4 x float>, i32, i32, i32)
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
index 74bad5ea3edce..a6b9150375175 100644
--- a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
+++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll
@@ -1408,9 +1408,6 @@ define amdgpu_kernel void @s_div_arcp_neg_k_x_pat_f32_daz(ptr addrspace(1) %out)
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.sqrt.f32(float) #1
-
 attributes #0 = { nounwind "unsafe-fp-math"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind "unsafe-fp-math"="true" "denormal-fp-math-f32"="preserve-sign,preserve-sign" }
diff --git a/llvm/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll b/llvm/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
index aeff6117d8fd1..8410964bcd90c 100644
--- a/llvm/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
+++ b/llvm/test/CodeGen/AMDGPU/read-register-invalid-subtarget.ll
@@ -2,8 +2,6 @@
 
 ; CHECK: invalid register "flat_scratch_lo" for subtarget.
 
-declare i32 @llvm.read_register.i32(metadata) #0
-
 define amdgpu_kernel void @test_invalid_read_flat_scratch_lo(ptr addrspace(1) %out) nounwind {
   store volatile i32 0, ptr addrspace(3) undef
   %m0 = call i32 @llvm.read_register.i32(metadata !0)
diff --git a/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll b/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
index 3506a4730cda1..7d8176649141f 100644
--- a/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i32.ll
@@ -2,8 +2,6 @@
 
 ; CHECK: invalid type for register "exec".
 
-declare i32 @llvm.read_register.i32(metadata) #0
-
 define amdgpu_kernel void @test_invalid_read_exec(ptr addrspace(1) %out) nounwind {
   store volatile i32 0, ptr addrspace(3) undef
   %m0 = call i32 @llvm.read_register.i32(metadata !0)
diff --git a/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll b/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
index 02ee219537d54..473722ae6303e 100644
--- a/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/read-register-invalid-type-i64.ll
@@ -2,8 +2,6 @@
 
 ; CHECK: invalid type for register "m0".
 
-declare i64 @llvm.read_register.i64(metadata) #0
-
 define amdgpu_kernel void @test_invalid_read_m0(ptr addrspace(1) %out) #0 {
   %exec = call i64 @llvm.read_register.i64(metadata !0)
   store i64 %exec, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/read_register.ll b/llvm/test/CodeGen/AMDGPU/read_register.ll
index 227a08ecc4dbe..501d683fcaaa7 100644
--- a/llvm/test/CodeGen/AMDGPU/read_register.ll
+++ b/llvm/test/CodeGen/AMDGPU/read_register.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck %s
 
-declare i32 @llvm.read_register.i32(metadata) #0
-declare i64 @llvm.read_register.i64(metadata) #0
-
 ; CHECK-LABEL: {{^}}test_read_m0:
 ; CHECK: s_mov_b32 m0, -1
 ; CHECK: v_mov_b32_e32 [[COPY:v[0-9]+]], m0
diff --git a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
index fd422b344d834..79933c3996b74 100644
--- a/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readcyclecounter.ll
@@ -11,8 +11,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
 
-declare i64 @llvm.readcyclecounter() #0
-
 ; GCN-LABEL: {{^}}test_readcyclecounter:
 ; MEMTIME-DAG: s_memtime s{{\[[0-9]+:[0-9]+\]}}
 ; GCN-DAG:     s_load_{{dwordx2|b64}}
diff --git a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
index 15f664c98182a..8ce85469d18ab 100644
--- a/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
+++ b/llvm/test/CodeGen/AMDGPU/readsteadycounter.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GCN,GFX1100
 
-declare i64 @llvm.readsteadycounter() #0
-
 ; GCN-LABEL: {{^}}test_readsteadycounter:
 ; GFX700: s_mov_b32 s[[REG:[0-9]+]], 0
 ; GFX900: s_memrealtime s[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
index 77e1694dbe7e1..47f4d1397a6eb 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll
@@ -3890,13 +3890,6 @@ entry:
   ret i64 %f
 }
 
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #2
-declare i32 @llvm.amdgcn.workgroup.id.x() #2
-declare i64 @llvm.amdgcn.mul.u24(i32, i32)
-declare i64 @llvm.amdgcn.mul.i24(i32, i32)
-
 attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
 attributes #1 = { mustprogress nofree nosync nounwind willreturn memory(read, argmem: readwrite, inaccessiblemem: none) }
 attributes #2 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-scalar.ll b/llvm/test/CodeGen/AMDGPU/reassoc-scalar.ll
index 22fe358b3b6be..0b8d98427106d 100644
--- a/llvm/test/CodeGen/AMDGPU/reassoc-scalar.ll
+++ b/llvm/test/CodeGen/AMDGPU/reassoc-scalar.ll
@@ -124,6 +124,3 @@ bb:
   store volatile i32 1, ptr addrspace(1) %add2, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.workitem.id.y()
diff --git a/llvm/test/CodeGen/AMDGPU/reduction.ll b/llvm/test/CodeGen/AMDGPU/reduction.ll
index 53a036b617725..2475901f5e5da 100644
--- a/llvm/test/CodeGen/AMDGPU/reduction.ll
+++ b/llvm/test/CodeGen/AMDGPU/reduction.ll
@@ -561,6 +561,3 @@ entry:
   %res = extractelement <4 x half> %rdx.minmax.select3, i32 0
   ret half %res
 }
-
-declare <4 x half> @llvm.minnum.v4f16(<4 x half>, <4 x half>)
-declare <4 x half> @llvm.maxnum.v4f16(<4 x half>, <4 x half>)
diff --git a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
index 1f0e8c65a15e4..56fea308b47a2 100644
--- a/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/reg-coalescer-sched-crash.ll
@@ -4,8 +4,6 @@
 ; The register coalescer introduces a verifier error which later
 ; results in a crash during scheduling.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @reg_coalescer_breaks_dead(ptr addrspace(1) nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, i1 %c0) #1 {
 bb:
   %id.x = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/register-count-comments.ll b/llvm/test/CodeGen/AMDGPU/register-count-comments.ll
index 8d12b3fe626da..c3ceeffbb0b25 100644
--- a/llvm/test/CodeGen/AMDGPU/register-count-comments.ll
+++ b/llvm/test/CodeGen/AMDGPU/register-count-comments.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -asm-verbose < %s | FileCheck -check-prefix=SI %s
 ; RUN: llc -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs -asm-verbose -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-
 ; SI-LABEL: {{^}}foo:
 ; SI: .section	.AMDGPU.csdata
 ; SI: ; Kernel info:
diff --git a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
index 04e995b6f343e..974b5023cce62 100644
--- a/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/remaining-virtual-register-operands.ll
@@ -24,9 +24,6 @@ define amdgpu_kernel void @alloc_failure_with_split_vregs(float %v0, float %v1)
   ret void
 }
 
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32 immarg, i32 immarg, i32 immarg) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 attributes #1 = { convergent nounwind readnone willreturn }
 attributes #2 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
index a433509511584..03ffcb0d56ef1 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
+++ b/llvm/test/CodeGen/AMDGPU/remat-fp64-constants.ll
@@ -40,6 +40,3 @@ bb3:                                              ; preds = %bb3, %bb
   %i23 = icmp eq i32 %i22, 1024
   br i1 %i23, label %bb2, label %bb3
 }
-
-declare double @llvm.fma.f64(double, double, double)
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
index 0c67f00d7bebf..eca49c10930a4 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-extended-image-insts.ll
@@ -42,6 +42,4 @@ define void @caller(float noundef %0, float noundef %1, <8 x i32> noundef %2, <4
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
-
 attributes #0 = { "target-features"="+extended-image-insts" }
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
index 594fad389b6b9..17c9942f310b3 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-gws.ll
@@ -45,9 +45,6 @@ define void @gws_caller(i32 %val0, i32 %val1) {
   ret void
 }
 
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-
 
 attributes #0 = { "target-features"="+gws"}
 attributes #1 = { convergent inaccessiblememonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
index 2c2401f120cf5..55c0e4b6c2e5d 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-s-time.ll
@@ -64,9 +64,5 @@ define i64 @s_memtime_caller() {
   ret i64 %t
 }
 
-
-declare i64 @llvm.amdgcn.s.memrealtime()
-declare i64 @llvm.amdgcn.s.memtime()
-
 attributes #0 = { "target-features"="+s-memrealtime"}
 attributes #1 = { "target-features"="+s-memtime-inst"}
diff --git a/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll b/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
index 8ef1d3ff27e51..f58bd9028fa9b 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-incompatible-wave32-feature.ll
@@ -42,9 +42,6 @@ define void @caller(ptr %out) {
   ret void
 }
 
-declare i32 @llvm.read_register.i32(metadata)
-declare i32 @llvm.ctpop.i32(i32)
-
 !0 = !{!"exec_lo"}
 
 attributes #0 = { "target-features"="+wavefrontsize32" }
diff --git a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
index 8c584a1890c9d..8ce62afb35f44 100644
--- a/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/reqd-work-group-size.ll
@@ -439,14 +439,6 @@ define amdgpu_kernel void @no_use_dispatch_ptr() {
   ret void
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.y() #1
-declare i32 @llvm.amdgcn.workgroup.id.z() #1
-declare i32 @llvm.umin.i32(i32, i32) #1
-declare i32 @llvm.smin.i32(i32, i32) #1
-declare i32 @llvm.umax.i32(i32, i32) #1
-
 attributes #0 = { nounwind "uniform-work-group-size"="true" }
 attributes #1 = { nounwind readnone speculatable }
 attributes #2 = { nounwind "uniform-work-group-size"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
index a640ac985ade4..daa1713ff8c6f 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-optimization-remarks.ll
@@ -186,8 +186,6 @@ define amdgpu_kernel void @test_indirect_call() !dbg !9 {
 ; STDERR-NEXT: remark: foo.cl:74:0:     VGPRs Spill: 0
 ; STDERR-NEXT: remark: foo.cl:74:0:     LDS Size [bytes/block]: 0
 
-declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture readonly, i8, i64, i1 immarg)
-
 define amdgpu_kernel void @test_indirect_w_static_stack() !dbg !10 {
   %alloca = alloca <10 x i64>, align 16, addrspace(5)
   call void @llvm.memset.p5.i64(ptr addrspace(5) %alloca, i8 0, i64 40, i1 false)
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll
index 05f1d5960beb2..2a2316d0def76 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-pal.ll
@@ -13,5 +13,3 @@ define amdgpu_vs void @test() {
   call amdgpu_gfx void %1()
   ret void
 }
-
-declare i64 @llvm.amdgcn.s.getpc()
diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll
index a0528825d24f1..e34eb089c90ca 100644
--- a/llvm/test/CodeGen/AMDGPU/ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret.ll
@@ -247,8 +247,6 @@ define amdgpu_ps float @ret_return_to_epilog_pseudo_size() #0 {
   ret float undef
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind "InitialPSInputAddr"="0" }
 attributes #2 = { nounwind "InitialPSInputAddr"="1" }
diff --git a/llvm/test/CodeGen/AMDGPU/ret_jump.ll b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
index ad38d78ddb2ff..9f3413c39a270 100644
--- a/llvm/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/llvm/test/CodeGen/AMDGPU/ret_jump.ll
@@ -108,22 +108,16 @@ ret.bb:                                          ; preds = %else, %main_body
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fabs.f32(float) #1
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.sqrt.f32(float) #1
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.floor.f32(float) #1
 
 attributes #0 = { "InitialPSInputAddr"="36983" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/returnaddress.ll b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
index 3d8e10b00f6d4..a155e302bcf00 100644
--- a/llvm/test/CodeGen/AMDGPU/returnaddress.ll
+++ b/llvm/test/CodeGen/AMDGPU/returnaddress.ll
@@ -79,5 +79,3 @@ entry:
   store volatile ptr %ret1, ptr addrspace(1) undef
   ret void
 }
-
-declare ptr @llvm.returnaddress(i32) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
index 85d0c054754d0..db6b57fa4b1a7 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
@@ -7,8 +7,6 @@
     unreachable, !dbg !11
   }
 
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
   attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
   !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/rotl.ll b/llvm/test/CodeGen/AMDGPU/rotl.ll
index a87973d93ac77..0accf451b4cc9 100644
--- a/llvm/test/CodeGen/AMDGPU/rotl.ll
+++ b/llvm/test/CodeGen/AMDGPU/rotl.ll
@@ -276,8 +276,6 @@ entry:
   ret void
 }
 
-declare i16 @llvm.fshl.i16(i16, i16, i16)
-
 define void @test_rotl_i16(ptr addrspace(1) nocapture readonly %sourceA, ptr addrspace(1) nocapture readonly %sourceB, ptr addrspace(1) nocapture %destValues) {
 ; R600-LABEL: test_rotl_i16:
 ; R600:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/rotr.ll b/llvm/test/CodeGen/AMDGPU/rotr.ll
index 058ee589bc4b0..6e6f4798b43a6 100644
--- a/llvm/test/CodeGen/AMDGPU/rotr.ll
+++ b/llvm/test/CodeGen/AMDGPU/rotr.ll
@@ -233,8 +233,6 @@ entry:
   ret void
 }
 
-declare i16 @llvm.fshr.i16(i16, i16, i16)
-
 define void @test_rotr_i16(ptr addrspace(1) nocapture readonly %sourceA, ptr addrspace(1) nocapture readonly %sourceB, ptr addrspace(1) nocapture %destValues) {
 ; R600-LABEL: test_rotr_i16:
 ; R600:       ; %bb.0: ; %entry
diff --git a/llvm/test/CodeGen/AMDGPU/roundeven.ll b/llvm/test/CodeGen/AMDGPU/roundeven.ll
index 528dcfc6bc038..b529032c90934 100644
--- a/llvm/test/CodeGen/AMDGPU/roundeven.ll
+++ b/llvm/test/CodeGen/AMDGPU/roundeven.ll
@@ -1273,19 +1273,4 @@ define <2 x double> @v_roundeven_v2f64(<2 x double> %x) {
   ret <2 x double> %roundeven
 }
 
-declare half @llvm.roundeven.f16(half) #0
-declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0
-declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0
-
-declare float @llvm.roundeven.f32(float) #0
-declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0
-declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0
-declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0
-
-declare double @llvm.roundeven.f64(double) #0
-declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0
-
-declare half @llvm.fabs.f16(half) #0
-declare float @llvm.fabs.f32(float) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
index 846fbdb33d668..b4d1142112df9 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f32.ll
@@ -12,11 +12,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN-DAZ,GCN-DAZ-SAFE,CI-DAZ-SAFE %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=hawaii -denormal-fp-math-f32=ieee < %s          | FileCheck -check-prefixes=GCN-IEEE,GCN-IEEE-SAFE,CI-IEEE-SAFE %s
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-declare float @llvm.sqrt.f32(float) nounwind readnone
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) nounwind readnone
-
 define amdgpu_kernel void @rsq_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) {
 ; GCN-DAZ-UNSAFE-LABEL: rsq_f32:
 ; GCN-DAZ-UNSAFE:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index bd6e1f54e636d..43d59954fc9a4 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -5,13 +5,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,SDAG,VI-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GISEL,VI-GISEL %s
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare i32 @llvm.amdgcn.readfirstlane(i32)
-declare double @llvm.sqrt.f64(double)
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
-declare double @llvm.amdgcn.sqrt.f64(double)
-declare double @llvm.fabs.f64(double)
-
 define amdgpu_ps <2 x i32> @s_rsq_f64(double inreg %x) {
 ; SI-SDAG-LABEL: s_rsq_f64:
 ; SI-SDAG:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll b/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
index 30f32e1f3e336..5753fa575b213 100644
--- a/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
+++ b/llvm/test/CodeGen/AMDGPU/rv7x0_count3.ll
@@ -44,9 +44,6 @@ bb:
   ret void
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 ; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
index 4ea77d1d1ac15..fac70edf4780b 100644
--- a/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
@@ -92,5 +92,3 @@ entry:
   store i64 %0, ptr addrspace(1) undef
   ret void
 }
-
-declare i64 @llvm.amdgcn.s.getpc()
diff --git a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
index 0b58b95050524..ec50fac475611 100644
--- a/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_addk_i32.ll
@@ -114,7 +114,5 @@ define amdgpu_kernel void @commute_s_addk_i32(ptr addrspace(1) %out, i32 %b) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
index c7987d3d00917..303a6b6ebd852 100644
--- a/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_mulk_i32.ll
@@ -51,7 +51,5 @@ define amdgpu_kernel void @commute_s_mulk_i32(ptr addrspace(1) %out, i32 %b) #0
   ret void
 }
 
-declare i32 @llvm.amdgcn.groupstaticsize() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll
index bd3c422b52efc..a316e24356dc7 100644
--- a/llvm/test/CodeGen/AMDGPU/saddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddo.ll
@@ -5,13 +5,6 @@
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefix=GFX10
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck %s --check-prefix=GFX11
 
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
-
-
-declare { <2 x i32>, <2 x i1> } @llvm.sadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
 define amdgpu_kernel void @saddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
 ; SI-LABEL: saddo_i64_zext:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/saddsat.ll b/llvm/test/CodeGen/AMDGPU/saddsat.ll
index 9c5214338c54a..ec317f18be434 100644
--- a/llvm/test/CodeGen/AMDGPU/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddsat.ll
@@ -472,12 +472,3 @@ define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) {
   %result = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs)
   ret i64 %result
 }
-
-declare i8 @llvm.sadd.sat.i8(i8, i8) #0
-declare i16 @llvm.sadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.sadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare i64 @llvm.sadd.sat.i64(i64, i64) #0
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index 714b2af1698fe..0d2913a330a95 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -2,9 +2,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=CI -check-prefix=CI-NOHSA %s
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=CI --check-prefix=GCN-HSA %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
 ; In this test both the pointer and the offset operands to the
 ; BUFFER_LOAD instructions end up being stored in vgprs.  This
 ; requires us to add the pointer and offset together, store the
diff --git a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
index 1b2dbec2b95c6..94276a0dbe764 100644
--- a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
+++ b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
@@ -36,9 +36,6 @@ entry:
   ret void
 }
 
-
-declare i32 @llvm.OpenCL.sampler.get.resource.id(i32) #0
-
 attributes #0 = { readnone }
 
 !opencl.kernels = !{!0, !1, !2}
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
index 5e6849ec61b47..38b8956fa02fb 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop1.ll
@@ -163,12 +163,3 @@ define amdgpu_vs half @frint_f16(half inreg %val) {
   %res = call half @llvm.rint.f16(half %val)
   ret half %res
 }
-
-declare float @llvm.ceil.f32(float)
-declare float @llvm.floor.f32(float)
-declare float @llvm.trunc.f32(float)
-declare float @llvm.rint.f32(float)
-declare half @llvm.ceil.f16(half)
-declare half @llvm.floor.f16(half)
-declare half @llvm.trunc.f16(half)
-declare half @llvm.rint.f16(half)
diff --git a/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
index cf73803f8929d..fd4c7814abeac 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar-float-sop2.ll
@@ -201,12 +201,3 @@ bb:
   %i5 = fadd contract float %i4, %i1
   ret float %i5
 }
-
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
-declare float @llvm.minnum.f32(float, float)
-declare float @llvm.maxnum.f32(float, float)
-declare half @llvm.minnum.f16(half, half)
-declare half @llvm.maxnum.f16(half, half)
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
-declare float @llvm.fma.f32(float, float, float) nounwind readnone
-declare half @llvm.fma.f16(half, half, half) nounwind readnone
diff --git a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
index baee88b69d060..46cf74f63ea1b 100644
--- a/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/scalar_to_vector.v8i16.ll
@@ -160,7 +160,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index c0d199920bd94..574ff8a04593b 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -138,17 +138,6 @@
   }
 
   declare void @func(float, i64, ptr addrspace(5))
-  declare void @llvm.lifetime.start.p5(i64, ptr addrspace(5) nocapture) #0
-  declare float @llvm.fmuladd.f32(float, float, float) #1
-  declare void @llvm.lifetime.end.p5(i64, ptr addrspace(5) nocapture) #0
-  declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
-  declare i32 @llvm.amdgcn.workitem.id.x() #1
-  declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-  declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
-  declare i32 @llvm.amdgcn.workitem.id.y() #1
-  declare i32 @llvm.amdgcn.workitem.id.z() #1
-  declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i32, i1) #0
-  declare void @llvm.memcpy.p1.p3.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(3) nocapture readonly, i64, i32, i1) #0
 
   attributes #0 = { argmemonly nounwind }
   attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/sched-setprio.ll b/llvm/test/CodeGen/AMDGPU/sched-setprio.ll
index a5e4b58018348..be02c344c57ad 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-setprio.ll
+++ b/llvm/test/CodeGen/AMDGPU/sched-setprio.ll
@@ -1,8 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.setprio(i16)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32:
 ; GCN: s_setprio 1
 ; GCN: v_mfma
diff --git a/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll b/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
index c20dbba42ccd4..b3ac6cd02736e 100644
--- a/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
+++ b/llvm/test/CodeGen/AMDGPU/sched.barrier.inverted.mask.ll
@@ -104,8 +104,5 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.sched.barrier(i32) #1
-declare void @llvm.amdcn.s.nop(i16) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
index 0139c52db1d58..2f52269875799 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-addrspaces.ll
@@ -21,5 +21,3 @@ define amdgpu_gfx void @example(<4 x i32> inreg %rsrc, ptr addrspace(5) %src, i3
   call void @llvm.amdgcn.raw.buffer.store.i32(i32 %x1, <4 x i32> %rsrc, i32 %dst1, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-avoid-spills.ll b/llvm/test/CodeGen/AMDGPU/schedule-avoid-spills.ll
index 4096d329dbb27..2d07fc78178f1 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-avoid-spills.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-avoid-spills.ll
@@ -291,5 +291,3 @@ bb2:
   call void asm sideeffect "; use $0","{v[0:31]}"(<32 x i32> %vgpr3)
   ret void
 }
-
-declare float @llvm.fmuladd.f32(float, float, float)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
index ae779eb3410ff..6176c504ba369 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested-if.ll
@@ -76,10 +76,5 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
index 57d3c7fc8bd01..8c20358c519bd 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop-nested.ll
@@ -83,9 +83,5 @@ ENDIF30:                                          ; preds = %LOOP29
   br label %LOOP29
 }
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
index 9f0b2119d0f4e..6176c504ba369 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-fs-loop.ll
@@ -76,9 +76,5 @@ ELSE17:                                           ; preds = %ELSE
   br label %ENDIF
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll b/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
index 1888df188ac07..ac24e9affa378 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-if-2.ll
@@ -89,6 +89,4 @@ IF23:                                             ; preds = %ELSE
 
 declare float @fabsf(float) #0
 
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
-
 attributes #0 = { readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-if.ll b/llvm/test/CodeGen/AMDGPU/schedule-if.ll
index 0e2a82d57044b..65455b2491af2 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-if.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-if.ll
@@ -42,5 +42,3 @@ IF13:                                             ; preds = %ELSE
   %23 = fadd float 1.000000e+03, %22
   br label %ENDIF
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll b/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
index 11602b1d353f9..28d04b3b0b09e 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-ilp.ll
@@ -585,7 +585,6 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #1
 
 attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
index bd1258cb1cf98..4d0a4a384d606 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit-clustering.ll
@@ -39,7 +39,5 @@ bb2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { "amdgpu-num-vgpr"="9" }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
index 3ba80380091bb..0ec852ed6af2e 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit.ll
@@ -586,6 +586,5 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
index 2c9d24ee04ebf..cbd82762fd67f 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit2.ll
@@ -286,5 +286,4 @@ bb:
   store float %res.29, ptr addrspace(1) %adr.res.29, align 4
   ret void
 }
-declare float @llvm.fmuladd.f32(float, float, float) #0
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
index 96b40bca5e2e3..e5ea39d5058c5 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-limit3.ll
@@ -594,7 +594,6 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
index 26f9ba4c27808..b089a95522187 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-regpressure-misched-max-waves.ll
@@ -104,7 +104,6 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll b/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
index 94815558bf3d6..d5ba9ee3ead40 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-relaxed-occupancy.ll
@@ -593,7 +593,6 @@ bb:
 }
 
 ; Function Attrs: nounwind readnone
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { "amdgpu-wave-limiter"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
index f6441f34cb619..3f79e957c446c 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop-failure.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -O0 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -O0 -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.amdgcn.s.barrier() nounwind convergent
-
 ; GCN-LABEL: {{^}}main:
 define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1) {
 main_body:
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll
index b6c7a03b990fd..6a3292e43e245 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-vs-if-nested-loop.ll
@@ -126,5 +126,3 @@ ENDIF19:                                          ; preds = %ENDIF16
   %97 = fadd float %temp4.0, 1.000000e+00
   br label %LOOP
 }
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
index a7e361b4b67cc..641664739dfd7 100644
--- a/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
+++ b/llvm/test/CodeGen/AMDGPU/schedule-xdl-resource.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -debug-only=machine-scheduler -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope %s
 ; REQUIRES: asserts
 
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half>, <4 x half>, <32 x float>, i32, i32, i32)
-
 ; CHECK: CritRes: {{[0-9]+}} HWXDL
 ; CHECK: Picking: Cand SU([[nid:[0-9]+]]) RES-DEMAND
 ; CHECK: Scheduling SU([[nid]]) {{.*}} V_MFMA_F32_32X32X4F16
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll b/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
index 8cb1d250a6fa7..526bd3f9604a0 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-rp-calc-one-successor-two-predecessors-bug.ll
@@ -1,11 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
 
-declare void @llvm.amdgcn.kill(i1)
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float)
-declare void @llvm.amdgcn.exp.compr.v2f16(i32 immarg, i32 immarg, <2 x half>, <2 x half>, i1 immarg, i1 immarg)
-
 define amdgpu_ps void @_amdgpu_ps_main(float %arg) {
 ; GFX900-LABEL: _amdgpu_ps_main:
 ; GFX900:       ; %bb.0: ; %bb
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
index 1c912d09c47d1..25c7faf776467 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
@@ -45,10 +45,6 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #1
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg, i32 immarg) #3
-
 attributes #0 = { nounwind "target-cpu"="tonga" }
 attributes #1 = { nounwind readnone willreturn }
 attributes #2 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll
index a3489a8bb1403..a11889bdcf8ec 100644
--- a/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdag-print-divergence.ll
@@ -29,5 +29,3 @@ entry:
   %ret = fadd float %sadd, %vector
   ret float %ret
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-op64-test.ll b/llvm/test/CodeGen/AMDGPU/sdwa-op64-test.ll
index 38a96eeaa3d0f..0f7b6517c61c8 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-op64-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-op64-test.ll
@@ -70,5 +70,3 @@ bb:
   store i64 %tmp19, ptr addrspace(1) %tmp17, align 8
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index c9dbadcbd2315..b653b2219b365 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -2205,8 +2205,6 @@ bb2:
   br label %bb0
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
index d807c3909e656..e5a2ea04d8783 100644
--- a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
@@ -1,8 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
-
-declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
-declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
 define amdgpu_kernel void @select_constant_cttz(ptr addrspace(1) noalias %out, ptr addrspace(1) nocapture readonly %arrayidx) nounwind {
 ; GCN-LABEL: select_constant_cttz:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
index c94bf01fa460a..2ebd0449e7f3f 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract-legacy.ll
@@ -37,8 +37,5 @@ define amdgpu_kernel void @select_fneg_posk_src_mul_legacy_f32(i32 %c) #2 {
   ret void
 }
 
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
index 7c1da18de70f8..35eb2a3403aff 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.f16.ll
@@ -1771,8 +1771,4 @@ define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) {
   ret half %select
 }
 
-declare half @llvm.fabs.f16(half) #0
-declare half @llvm.fma.f16(half, half, half) #0
-declare half @llvm.fmuladd.f16(half, half, half) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
index ae3da60ef6c93..c41690ffa41af 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.legal.f16.ll
@@ -18,6 +18,4 @@ define half @select_fneg_posk_src_rcp_f16(i32 %c, half %x, half %y) {
   ret half %select
 }
 
-declare half @llvm.amdgcn.rcp.f16(half) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
index b18e34941d4fe..c362f038b45f6 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.ll
@@ -989,13 +989,5 @@ define amdgpu_kernel void @mul_select_negk_0_negfabs_f32(i32 %c) #0 {
   ret void
 }
 
-
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.amdgcn.rcp.f32(float) #1
-declare float @llvm.amdgcn.rcp.legacy(float) #1
-declare float @llvm.amdgcn.fmul.legacy(float, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index d2bb971b68030..0ea0abf2a29de 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -3364,8 +3364,4 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
   ret <2 x half> %select
 }
 
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/select-undef.ll b/llvm/test/CodeGen/AMDGPU/select-undef.ll
index 5b9866a3c9157..2b175dc375603 100644
--- a/llvm/test/CodeGen/AMDGPU/select-undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-undef.ll
@@ -40,8 +40,6 @@ define void @select_undef_n2(ptr addrspace(1) %a, i32 %c) {
   ret void
 }
 
-declare float @llvm.amdgcn.rcp.f32(float)
-
 
 ; Make sure the vector undef isn't lowered into 0s.
 ; GCN-LABEL: {{^}}undef_v6f32:
diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
index cca44548bb8f8..373c1b9e56a62 100644
--- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
@@ -445,7 +445,6 @@ define amdgpu_kernel void @v_select_v4f16(ptr addrspace(1) %out, ptr addrspace(1
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
index 81858bd3d29ee..338d593e09088 100644
--- a/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-inactive-wwm-overwrite.ll
@@ -128,11 +128,6 @@ define amdgpu_cs void @if_else_vgpr_opt(ptr addrspace(8) inreg %input, ptr addrs
   ret void
 }
 
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32 immarg, i32 immarg) #2
-
 attributes #0 = { convergent nounwind readnone willreturn }
 attributes #1 = { convergent nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind willreturn writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll b/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll
index ab6877ac4e6d1..b64003ba47eca 100644
--- a/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll
+++ b/llvm/test/CodeGen/AMDGPU/set-wave-priority.ll
@@ -202,5 +202,3 @@ b:
   %sub = fsub <2 x float> %add2, %v2
   ret <2 x float> %sub
 }
-
-declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
index 4432ac4a9e8ff..37a0121729b66 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc-sext.ll
@@ -284,9 +284,4 @@ endif:
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/setcc.ll b/llvm/test/CodeGen/AMDGPU/setcc.ll
index 6ab49382b9049..c4555a9596171 100644
--- a/llvm/test/CodeGen/AMDGPU/setcc.ll
+++ b/llvm/test/CodeGen/AMDGPU/setcc.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=FUNC %s
 ; RUN: llc -mtriple=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=R600 -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}setcc_v2i32:
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[3].X, KC0[3].Z
 ; R600-DAG: SETE_INT * T{{[0-9]+\.[XYZW]}}, KC0[2].W, KC0[3].Y
diff --git a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
index 2169ee117cbaa..ddfaa429e6fc4 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-divergence-driven-isel.ll
@@ -127,7 +127,5 @@ define amdgpu_kernel void @sext_i32_to_i64_divergent(ptr addrspace(1) %out, i32
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
index 38672da3c647b..953727a4bbca2 100644
--- a/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
+++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg.ll
@@ -715,7 +715,5 @@ define amdgpu_kernel void @sext_in_reg_v3i8_to_v3i16(ptr addrspace(1) %out, <3 x
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index b67ecc2f9d13c..45114efa0d648 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -222,6 +222,4 @@ endif:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy-local-cse.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy-local-cse.ll
index 8abd4b4302f54..4b279b01c851a 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy-local-cse.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy-local-cse.ll
@@ -23,5 +23,3 @@ entry:
   store float %sum, ptr addrspace(1) %pk
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
index a3cb3cfba5552..2225b4a90317b 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -390,19 +390,6 @@ bb:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.pow.f32(float, float) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/sgprcopies.ll b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
index 5a66bff1ce61c..d5d80dec324ef 100644
--- a/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgprcopies.ll
@@ -51,8 +51,6 @@ for.end:                                          ; preds = %for.end.loopexit, %
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare float @llvm.fmuladd.f32(float, float, float) #1
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll b/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
index e0308bc56d172..74578b279d9b3 100644
--- a/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
+++ b/llvm/test/CodeGen/AMDGPU/shared-op-cycle.ll
@@ -36,8 +36,5 @@ define amdgpu_vs void @main(<4 x float> inreg %reg0, <4 x float> inreg %reg1, <4
 }
 
 ; Function Attrs: readnone
-declare float @llvm.r600.dot4(<4 x float>, <4 x float>) #1
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
index 21fcd3cd0dcd6..fffb038d16e6a 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i128-ubfe.ll
@@ -159,9 +159,5 @@ define amdgpu_kernel void @v_uextract_bit_34_100_i128(ptr addrspace(1) %out, ptr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
index bdc607552a0df..fcd0e68259a62 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-and-i64-ubfe.ll
@@ -390,9 +390,5 @@ define amdgpu_kernel void @v_uextract_bit_33_36_use_upper_half_shift_i64(ptr add
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/shift-select.ll b/llvm/test/CodeGen/AMDGPU/shift-select.ll
index 72069e19d51bb..84f9f870ef69f 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-select.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-select.ll
@@ -131,5 +131,3 @@ define amdgpu_kernel void @v_ashr_i64(ptr addrspace(1) %out, ptr addrspace(1) %i
   store i64 %result, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
index 0f60790ff396f..ba6501227a7d7 100644
--- a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll
@@ -36,5 +36,3 @@ bb:
   store i32 %val, ptr addrspace(1) %arg, align 4
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index c440392153adb..78ed6948ec43a 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -3,10 +3,6 @@
 ; RUN: llc < %s -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=VI
 ; RUN: llc < %s -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-
 define amdgpu_kernel void @shl_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: shl_v2i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index b81af3eb838f1..f4279f4c85a8f 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -725,7 +725,5 @@ define amdgpu_kernel void @shl_v_imm_v4i16(ptr addrspace(1) %out, ptr addrspace(
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
index 945b92a3662ef..47febd7b3b959 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; Test with inline immediate
 
 ; SI-LABEL: {{^}}shl_2_add_9_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
index 9127cc3ffb34e..151ffcbecf47d 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll
@@ -6,9 +6,6 @@
 ; multiple uses. This is relevant to accessing 2 separate, adjacent
 ; LDS globals.
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 @lds0 = addrspace(3) global [512 x float] undef, align 4
 @lds1 = addrspace(3) global [512 x float] undef, align 4
 
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
index c04cb89e9527b..641ef9f34cc91 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_csub.ll
@@ -17,6 +17,4 @@ define i32 @shl_base_atomicrmw_global_atomic_csub_ptr(ptr addrspace(1) %out, ptr
   ret i32 %val
 }
 
-declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #0
-
 attributes #0 = { argmemonly nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
index 39541537b3647..8546cea9815f5 100644
--- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr_global.ll
@@ -45,7 +45,5 @@ define void @shl_base_global_ptr_global_atomic_fadd(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
index 09e342fe19066..06f7e7944f58e 100644
--- a/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/should-not-hoist-set-inactive.ll
@@ -68,8 +68,3 @@ bb4:
 bb5:
   ret void
 }
-
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
-declare i32 @llvm.amdgcn.raw.buffer.atomic.add.i32(i32, <4 x i32>, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index 1a55bf608ebf5..1b32485b9f874 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -4171,7 +4171,5 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_undef(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
index cef959f45437d..e6aa706bf4582 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-kill.ll
@@ -120,12 +120,6 @@ endif2:
   ret float %.9
 }
 
-
-declare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare i1 @llvm.amdgcn.wqm.vote(i1) #3
-declare void @llvm.amdgcn.kill(i1) #4
-declare float @llvm.amdgcn.wqm.f32(float) #1
-
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind willreturn memory(argmem: readwrite) }
 attributes #3 = { convergent nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
index 2495c0dff8929..9333fa0c7e63c 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-noloop.ll
@@ -97,8 +97,5 @@ bb5:                                              ; preds = %bb3, %bb1
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
index 165b996981e34..09477d441e77a 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf-unreachable.ll
@@ -34,7 +34,5 @@ bb5:                                              ; preds = %bb3, %bb1
   unreachable
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
index 1ab63762ecbd7..f32875a09b6b9 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
@@ -161,8 +161,6 @@ sw.epilog:
   ret void
 }
 
-declare float @llvm.fabs.f32(float) nounwind readnone
-
 define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
 ; SI-LABEL: loop_land_info_assert:
 ; SI:       ; %bb.0: ; %entry
@@ -246,6 +244,4 @@ return:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
index a7af02017001f..7a751da668f49 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll
@@ -93,7 +93,6 @@ exit:                                             ; preds = %loop
 }
 
 ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
 
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll b/llvm/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
index 0edd9f4cd6b4f..8ecd58884edee 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotatecfg-multiple-backedges.ll
@@ -58,4 +58,3 @@ loop_exit:
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
index 917743bf5d14c..1c845e609ce60 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-kill.ll
@@ -68,6 +68,4 @@ endif:
   ret void
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index 13745d4d5b171..e3c2e6b4dfe00 100644
--- a/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -73,8 +73,6 @@ ret:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
index d34769ad0fcf0..fa0885f35e2b7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-optimize-vgpr-live-range-dbg-instr.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #0
-
 define void @__omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined(i1 %arg) {
 ; GCN-LABEL: __omp_offloading_35_36570d3__ZN6openmc31process_advance_particle_eventsEv_l252_debug___omp_outlined_debug___omp_outlined:
 ; GCN:       .Lfunc_begin0:
diff --git a/llvm/test/CodeGen/AMDGPU/si-scheduler-exports.ll b/llvm/test/CodeGen/AMDGPU/si-scheduler-exports.ll
index ac271ff6a258b..e571f1f6338c7 100644
--- a/llvm/test/CodeGen/AMDGPU/si-scheduler-exports.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-scheduler-exports.ll
@@ -20,6 +20,3 @@ entry:
   call void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float> zeroinitializer, ptr addrspace(8) zeroinitializer, i32 0, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
index 516fabce9c1cb..33ec7e31b89f6 100644
--- a/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-scheduler.ll
@@ -47,12 +47,6 @@ main_body:
   ret void
 }
 
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
@@ -87,4 +81,3 @@ bb6:                                              ; preds = %.entry
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) #1
diff --git a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
index a14c456952db7..eb2b06be34ef0 100644
--- a/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -1664,29 +1664,6 @@ ELSE214:                                          ; preds = %ELSE211
   br label %ENDIF209
 }
 
-declare float @llvm.exp2.f32(float) #1
-declare float @llvm.ceil.f32(float) #1
-declare float @llvm.fabs.f32(float) #1
-declare float @llvm.pow.f32(float, float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.rsq.f32(float) #1
-declare float @llvm.amdgcn.cubeid(float, float, float) #1
-declare float @llvm.amdgcn.cubesc(float, float, float) #1
-declare float @llvm.amdgcn.cubetc(float, float, float) #1
-declare float @llvm.amdgcn.cubema(float, float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
index 7290b47658b3d..0da0fc1509155 100644
--- a/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-spill-cf.ll
@@ -492,13 +492,5 @@ ELSE2824:                                         ; preds = %ELSE2821
   br label %ENDIF2795
 }
 
-declare float @llvm.floor.f32(float) #1
-declare float @llvm.sqrt.f32(float) #1
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
index 8fec79a7efd9b..8afca077f6156 100644
--- a/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-triv-disjoint-mem-access.ll
@@ -327,11 +327,6 @@ define amdgpu_vs void @reorder_local_load_tbuffer_store_local_load(ptr addrspace
   ret void
 }
 
-declare void @llvm.memcpy.p3.p0(ptr addrspace(3), ptr, i64, i1)
-declare void @llvm.amdgcn.s.barrier() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-declare void @llvm.amdgcn.struct.ptr.tbuffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32, i32 immarg, i32 immarg) #3
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind willreturn }
 attributes #2 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
index f9a17783f0d35..5da99a5d5d88d 100644
--- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll
@@ -2,9 +2,6 @@
 ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY
 ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -verify-machineinstrs | FileCheck %s
 
-declare void @llvm.trap()
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) {
 ; This used to bypass the structurization process because structurizer is unable to
 ; handle multiple-exits CFG. This should be correctly structurized.
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index 9a03d216c7a99..0ffa02751b675 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -619,6 +619,4 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addr
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
index 44678c70226e0..c4f0c9342fbd0 100644
--- a/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplify-libcalls2.ll
@@ -13,10 +13,6 @@ define void @foo(i32 %i) {
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
-declare void @llvm.dbg.value(metadata, metadata, metadata)
-
 !llvm.module.flags = !{!1}
 
 !0 = distinct !DICompileUnit(language: DW_LANG_C, file: !DIFile(filename: "1", directory: "1"))
diff --git a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
index fec2fcba83f4a..985895d090d9d 100644
--- a/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/simplifydemandedbits-recursion.ll
@@ -10,13 +10,10 @@
 @0 = external unnamed_addr addrspace(3) global [462 x float], align 4
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.y() #0
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 ; Function Attrs: nounwind readnone speculatable
-declare float @llvm.fmuladd.f32(float, float, float) #0
 
 ; CHECK: s_endpgm
 define amdgpu_kernel void @foo(ptr addrspace(1) noalias nocapture readonly %arg, ptr addrspace(1) noalias nocapture readonly %arg1, ptr addrspace(1) noalias nocapture %arg2, float %arg3, i1 %c0, i1 %c1, i1 %c2, i1 %c3, i1 %c4, i1 %c5) local_unnamed_addr !reqd_work_group_size !0 {
diff --git a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
index e1273e1a4bcd0..fae50979dc3bf 100644
--- a/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll
@@ -28,13 +28,10 @@ exit:                                             ; preds = %endif1, %if1
   ret float %i24
 }
 ; Function Attrs: nounwind readonly willreturn
-declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
 
 ; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
-declare float @llvm.fma.f32(float, float, float) #2
 
 ; Function Attrs: nounwind
-declare void @llvm.amdgcn.kill(i1) #4
 
 attributes #1 = { nounwind readnone }
 attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
index d1f05358ff13a..317f733a41223 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.f64.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @sint_to_fp_i32_to_f64(ptr addrspace(1) %out, i32 %in) {
 ; CI-LABEL: sint_to_fp_i32_to_f64:
 ; CI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
index b03726817c1b4..01dc1a465d20c 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -1035,7 +1035,5 @@ define amdgpu_kernel void @v_sint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
index d4b0dfda5afda..ca734689efe8e 100644
--- a/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/sint_to_fp.ll
@@ -116,7 +116,5 @@ define amdgpu_kernel void @v_sint_to_fp_i1_f32_load(ptr addrspace(1) %out, ptr a
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
index 6f768641b5b03..6c4269e57012e 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-trap.ll
@@ -48,10 +48,6 @@ end:
   ret void
 }
 
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind noreturn }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 138dd53b3ede4..4b6dd0d7526df 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1963,13 +1963,6 @@ bb.1:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
-declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare void @llvm.amdgcn.kill(i1) #0
-
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/smed3.ll b/llvm/test/CodeGen/AMDGPU/smed3.ll
index e0d0ddce208c4..0f3e3d19d9974 100644
--- a/llvm/test/CodeGen/AMDGPU/smed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/smed3.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i32:
 ; GCN: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
 define amdgpu_kernel void @v_test_smed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
@@ -90,8 +88,6 @@ define i64 @test_intMed3ImmCombine_no_32bit_extend(i64 %x) {
   %smin = call i64 @llvm.smin.i64(i64 %smax, i64 2)
   ret i64 %smin
 }
-declare i64 @llvm.smax.i64(i64, i64)
-declare i64 @llvm.smin.i64(i64, i64)
 
 ; GCN-LABEL: {{^}}v_test_smed3_r_i_i_i16:
 ; SI: v_med3_i32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
diff --git a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
index 233f4cc4fee50..777ed64419c8a 100644
--- a/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/smfmac_no_agprs.ll
@@ -50,4 +50,3 @@ entry:
   store i32 %vecext.3, ptr addrspace(1) %arrayidx4
   ret void
 }
-declare <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32>, <4 x i32>, <4 x i32>, i32, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll
index 65ae200275a0e..d95f573b0d4ae 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll
@@ -268,7 +268,5 @@ define amdgpu_kernel void @v_min_max_i32_user(ptr addrspace(1) %out0, ptr addrsp
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
index d7a6be5110691..9bf3879e093ea 100644
--- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll
@@ -232,7 +232,5 @@ define amdgpu_kernel void @u_min_max_v2i16(ptr addrspace(1) %out0, ptr addrspace
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll b/llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll
index d122e4d77926a..669bcdc006b24 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd-gfx10.ll
@@ -37,6 +37,4 @@ main_body:
   ret float %r
 }
 
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32)
-
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll
index 4ce9260b8d53d..bf32897d73c87 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd.ll
@@ -739,22 +739,6 @@ define amdgpu_ps void @s_buffer_load_v16f32(<4 x i32> inreg %rsrc, i32 inreg %of
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) #1
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
-declare <8 x i32> @llvm.amdgcn.s.buffer.load.v8i32(<4 x i32>, i32, i32)
-declare <16 x i32> @llvm.amdgcn.s.buffer.load.v16i32(<4 x i32>, i32, i32)
-
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32)
-declare <2 x float> @llvm.amdgcn.s.buffer.load.v2f32(<4 x i32>, i32, i32)
-declare <4 x float> @llvm.amdgcn.s.buffer.load.v4f32(<4 x i32>, i32, i32)
-declare <8 x float> @llvm.amdgcn.s.buffer.load.v8f32(<4 x i32>, i32, i32)
-declare <16 x float> @llvm.amdgcn.s.buffer.load.v16f32(<4 x i32>, i32, i32)
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
index 6312816a40c24..892e9a815b1c9 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd_vmem_war.ll
@@ -24,6 +24,5 @@ bb8:                                              ; preds = %bb3, %bb
   ret void
 }
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
index 3176257920a7a..728af6372f835 100644
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
+++ b/llvm/test/CodeGen/AMDGPU/soft-clause-exceeds-register-budget.ll
@@ -558,11 +558,6 @@ for.cond.cleanup26:                               ; preds = %for.cond28.preheade
   ret void
 }
 
-declare float @llvm.fmuladd.f32(float, float, float) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
-
 attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
 attributes #1 = { nounwind readnone speculatable willreturn }
 
diff --git a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
index 8b166b4c1bf3f..652196214886b 100644
--- a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
@@ -3,7 +3,6 @@
 
 ; Since this intrinsic is exposed as a constant after isel, use it to
 ; defeat the DAG's compare with constant canonicalizations.
-declare i32 @llvm.amdgcn.groupstaticsize() #1
 
 @lds = addrspace(3) global [512 x i32] undef, align 4
 
diff --git a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
index fc700c55d7ee5..20fd8df343750 100644
--- a/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
+++ b/llvm/test/CodeGen/AMDGPU/sopk-no-literal.ll
@@ -21,7 +21,5 @@ define amdgpu_kernel void @test_sopk_size(i32 %var.mode) {
   ret void
 }
 
-declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
index c9413b61758d1..97ac1acbcc220 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
@@ -116,11 +116,6 @@ define amdgpu_kernel void @max_6regs_used_8a(ptr addrspace(1) %arg) #4 {
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
-declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
-
 attributes #1 = { nounwind "amdgpu-num-vgpr"="10" }
 attributes #2 = { nounwind "amdgpu-num-vgpr"="12" }
 attributes #3 = { nounwind "amdgpu-num-vgpr"="32" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll b/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
index cc42077475cae..cadb3bb04a642 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-alloc-sgpr-init-bug.ll
@@ -22,7 +22,4 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
index c3b6d8d761f26..c3c188cab7ea9 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-cfg-position.ll
@@ -73,6 +73,4 @@ bb52:                                             ; preds = %bb44, %bb36
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-m0.ll b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
index b2235544686f1..275821c0437a4 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-m0.ll
@@ -182,12 +182,6 @@ ret:
   ret void
 }
 
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare float @llvm.amdgcn.wqm.f32(float) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
index bea2e6d4b45a3..49a4c8f4e8a21 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll
@@ -11086,9 +11086,6 @@ ret:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
-
 attributes #0 = { "amdgpu-waves-per-eu"="10,10" }
 attributes #1 = { nounwind readnone }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
index d5f97314f9324..a3f114f33753f 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll
@@ -27,6 +27,4 @@ define amdgpu_kernel void @test_spill_av_class(<4 x i32> %arg) #0 {
   ret void
 }
 
-declare <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32, i32, <4 x i32>, i32, i32, i32)
-
 attributes #0 = { nounwind "amdgpu-num-vgpr"="5" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index 81dd2c4457b2f..e6925e6631529 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -88,5 +88,3 @@ bb.4:                                                ; preds = %bb.3
 bb.5:                                                ; preds = %bb.3
   ret void
 }
-
-declare i32 @llvm.amdgcn.readfirstlane(i32)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
index 9eacb88066c0f..8796b57dc6cba 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr.ll
@@ -216,9 +216,6 @@ st:
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }
 attributes #1 = { "amdgpu-flat-work-group-size"="1,256" }
 attributes #2 = { nounwind "amdgpu-num-vgpr"="11" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
index c1c69ce568a9c..15dfbc0ddc46c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
@@ -4,8 +4,6 @@
 
 ; Callee must preserve the VGPR modified by writelane even if it is marked Caller-saved.
 
-declare i32 @llvm.amdgcn.writelane(i32, i32, i32)
-
 define void @sgpr_spill_writelane() {
 ; GCN-LABEL: sgpr_spill_writelane:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
index 6d91c33fd2876..1e04664cf6d82 100644
--- a/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-arg-dbg-value.ll
@@ -149,8 +149,6 @@ define hidden ptr addrspace(1) @split_ptr_arg(ptr addrspace(1) readnone returned
   ret ptr addrspace(1) %arg, !dbg !78
 }
 
-declare void @llvm.dbg.value(metadata, metadata, metadata) #1
-
 attributes #0 = { norecurse nounwind readnone }
 attributes #1 = { nounwind readnone speculatable }
 
diff --git a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
index 22ed0f37c1b3d..d6b050df19503 100644
--- a/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-scalar-i64-add.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() readnone
-
 ; This is broken because the low half of the 64-bit add remains on the
 ; SALU, but the upper half does not. The addc expects the carry bit
 ; set in vcc, which is undefined since the low scalar half add sets
diff --git a/llvm/test/CodeGen/AMDGPU/split-smrd.ll b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
index b6087e1108b10..9a7b1e4c72abb 100644
--- a/llvm/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-smrd.ll
@@ -28,11 +28,6 @@ bb3:                                              ; preds = %bb
   ret void
 }
 
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
index a5dcd9284edd0..957a220f44887 100644
--- a/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/split-vector-memoperand-offsets.ll
@@ -74,10 +74,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare i32 @llvm.amdgcn.workitem.id.y() #1
-declare i32 @llvm.amdgcn.workitem.id.z() #1
-
 attributes #0 = { norecurse nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
index b87439a9d6fae..6c4e4d458a283 100644
--- a/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
+++ b/llvm/test/CodeGen/AMDGPU/splitkit-getsubrangeformask.ll
@@ -759,11 +759,3 @@ define amdgpu_gs void @_amdgpu_gs_main(i32 inreg %primShaderTableAddrLow, <31 x
   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %381, i32 15, i32 undef, i32 undef, <8 x i32> %380, i32 0, i32 0)
   ret void
 }
-
-declare i32 @llvm.amdgcn.readfirstlane(i32)
-declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
-declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg)
-declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg)
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index ae0221b8b32b3..3917cc0201d09 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -3,8 +3,6 @@
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s -check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @ashr_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: ashr_v2i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index 418c160d4244a..b80750dff236b 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -3,8 +3,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s -check-prefixes=VI
 ; RUN: llc -amdgpu-scalarize-global-loads=false  -mtriple=r600 -mcpu=redwood < %s | FileCheck %s -check-prefixes=EG
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 define amdgpu_kernel void @lshr_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: lshr_i32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/ssubo.ll b/llvm/test/CodeGen/AMDGPU/ssubo.ll
index ed8b442a491d6..311c629766264 100644
--- a/llvm/test/CodeGen/AMDGPU/ssubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/ssubo.ll
@@ -2,11 +2,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs< %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
 
-
-declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
-declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
-declare { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
 ; FUNC-LABEL: {{^}}ssubo_i64_zext:
 define amdgpu_kernel void @ssubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
   %ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
index 71017f15e3c6d..877b331049747 100644
--- a/llvm/test/CodeGen/AMDGPU/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/ssubsat.ll
@@ -1097,17 +1097,4 @@ define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.ssub.sat.i8(i8, i8) #0
-declare i16 @llvm.ssub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.ssub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.ssub.sat.i64(i64, i64) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
index e378a83cff50d..ba706e49815a8 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-size-overflow.ll
@@ -1,8 +1,6 @@
 ; RUN: not llc -mtriple=amdgcn-- < %s 2>&1 | FileCheck -check-prefix=ERROR %s
 ; RUN: not llc -mtriple=amdgcn-- < %s | FileCheck -check-prefix=GCN %s
 
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture, i8, i32, i32, i1) #1
-
 ; ERROR: error: <unknown>:0:0: stack frame size (131064) exceeds limit (131056) in function 'stack_size_limit_wave64'
 ; GCN: ; ScratchSize: 131064
 define amdgpu_kernel void @stack_size_limit_wave64() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll
index 13dcaaf35132a..cb8e8d3f94a4b 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.invalid.ll
@@ -10,8 +10,6 @@
 
 ;--- stacksave-error.ll
 
-declare ptr @llvm.stacksave.p0()
-
 ; ERR-SAVE-SDAG: LLVM ERROR: Cannot select: {{.+}}: i64,ch = stacksave
 ; ERR-SAVE-GISEL: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p0) = G_STACKSAVE (in function: func_store_stacksave)
 define void @func_store_stacksave() {
@@ -22,8 +20,6 @@ define void @func_store_stacksave() {
 
 ;--- stackrestore-error.ll
 
-declare void @llvm.stackrestore.p0(ptr)
-
 ; ERR-RESTORE-SDAG: LLVM ERROR: Cannot select: {{.+}}: ch = stackrestore {{.+}}, {{.+}}
 ; ERR-RESTORE-GISEL: LLVM ERROR: unable to legalize instruction: G_STACKRESTORE %{{[0-9]+}}:_(p0) (in function: func_stacksave_sgpr)
 define amdgpu_gfx void @func_stacksave_sgpr(ptr inreg %stack) {
diff --git a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
index b8bc01e0b879b..dcb5b2570c7a1 100644
--- a/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
+++ b/llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
@@ -6,9 +6,6 @@
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -mattr=+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,WAVE64,WAVE64-O0 %s
 ; RUN: llc -O0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 -amdgpu-prealloc-sgpr-spill-vgprs=1 < %s | FileCheck -check-prefixes=GCN,WAVE32,WAVE32-WWM-PREALLOC %s
 
-declare ptr addrspace(5) @llvm.stacksave.p5()
-declare void @llvm.stackrestore.p5(ptr addrspace(5))
-
 define hidden void @stack_passed_argument([32 x i32], i32) {
 ; GCN-LABEL: stack_passed_argument:
 ; GCN:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/store-barrier.ll b/llvm/test/CodeGen/AMDGPU/store-barrier.ll
index af48d7eca30f8..02529333084bf 100644
--- a/llvm/test/CodeGen/AMDGPU/store-barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-barrier.ll
@@ -37,7 +37,6 @@ bb:
 }
 
 ; Function Attrs: convergent nounwind
-declare void @llvm.amdgcn.s.barrier() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll b/llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
index ef11bfdc992e4..e31ed172446ac 100644
--- a/llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-clobbers-load.ll
@@ -17,9 +17,6 @@ entry:
   store i32 %rslt, ptr addrspace(1) %out, align 4
   ret void
 }
-
-
-declare i32 @llvm.amdgcn.workitem.id.x()
 @lds0 = addrspace(3) global [512 x i32] undef, align 4
 
 ; To check that %arrayidx0 is not marked as amdgpu.noclobber.
diff --git a/llvm/test/CodeGen/AMDGPU/store_typed.ll b/llvm/test/CodeGen/AMDGPU/store_typed.ll
index c5a7111fefa0e..c831cde107497 100644
--- a/llvm/test/CodeGen/AMDGPU/store_typed.ll
+++ b/llvm/test/CodeGen/AMDGPU/store_typed.ll
@@ -20,5 +20,3 @@ define amdgpu_kernel void @store_typed_rat11(<4 x i32> %data, <4 x i32> %index)
   call void @llvm.r600.rat.store.typed(<4 x i32> %data, <4 x i32> %index, i32 11)
   ret void
 }
-
-declare void @llvm.r600.rat.store.typed(<4 x i32>, <4 x i32>, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
index f3be926f76bef..848ad5190e3a5 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f16.ll
@@ -248,10 +248,5 @@ define amdgpu_ps <2 x half> @s_constained_fadd_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fadd.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fadd.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fadd.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fadd.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
index 2aecf5fd8753c..5dc5e02400815 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f32.ll
@@ -217,9 +217,4 @@ define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
   ret float %val
 }
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
index faa0131c88c2d..f33828f97236a 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fadd.f64.ll
@@ -142,9 +142,5 @@ define amdgpu_ps <2 x float> @s_constained_fadd_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
index 407bb002483ec..95eaab1e0bd2f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f16.ll
@@ -280,10 +280,4 @@ define <2 x half> @v_constained_fma_v2f16_fpexcept_strict_fneg_fneg(<2 x half> %
   ret <2 x half> %val
 }
 
-declare half @llvm.fabs.f16(half)
-declare half @llvm.experimental.constrained.fma.f16(half, half, half, metadata, metadata)
-declare <2 x half> @llvm.experimental.constrained.fma.v2f16(<2 x half>, <2 x half>, <2 x half>, metadata, metadata)
-declare <3 x half> @llvm.experimental.constrained.fma.v3f16(<3 x half>, <3 x half>, <3 x half>, metadata, metadata)
-declare <4 x half> @llvm.experimental.constrained.fma.v4f16(<4 x half>, <4 x half>, <4 x half>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
index 922a4dcbea351..db0843dfbc42f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f32.ll
@@ -152,10 +152,4 @@ define <2 x float> @v_constained_fma_v2f32_fpexcept_strict_fneg_fneg(<2 x float>
   ret <2 x float> %val
 }
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fma.v3f32(<3 x float>, <3 x float>, <3 x float>, metadata, metadata)
-declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
index 5cb293ab25806..05ea0e9a43901 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fma.f64.ll
@@ -152,10 +152,4 @@ define <2 x double> @v_constained_fma_v2f64_fpexcept_strict_fneg_fneg(<2 x doubl
   ret <2 x double> %val
 }
 
-declare double @llvm.fabs.f64(double)
-declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
-declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
-declare <3 x double> @llvm.experimental.constrained.fma.v3f64(<3 x double>, <3 x double>, <3 x double>, metadata, metadata)
-declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
index d798166a67839..41012bd7dfdbb 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f16.ll
@@ -362,11 +362,6 @@ define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
index 8df2834928395..d7630888b8196 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f32.ll
@@ -220,9 +220,4 @@ define float @v_constained_fmul_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
   ret float %val
 }
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fmul.v2f32(<2 x float>, <2 x float>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
index 8c98a662c59cc..c46b0a8c4d16f 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fmul.f64.ll
@@ -145,9 +145,5 @@ define amdgpu_ps <2 x float> @s_constained_fmul_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
index df6c1a0aa8c46..855c44b0c3688 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fpext.ll
@@ -481,17 +481,5 @@ define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %p
   ret <2 x float> %result
 }
 
-declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1
-
-declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1
-
-declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
index 0f80327638a9c..616f2a3d298ae 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fptrunc.ll
@@ -379,18 +379,4 @@ define void @v_constrained_fptrunc_f32_to_f16_fpexcept_strict_noabi_fabs(float %
   ret void
 }
 
-declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata)
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata)
-
-declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
-
-declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata)
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f64(<3 x double>, metadata, metadata)
-
-declare float @llvm.fabs.f32(float)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
index 3420596da2aac..9cf5dbfbf4496 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll
@@ -542,11 +542,6 @@ define amdgpu_ps <2 x half> @s_constained_fsub_v2f16_fpexcept_strict(<2 x half>
   ret <2 x half> %val
 }
 
-declare half @llvm.experimental.constrained.fsub.f16(half, half, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.fsub.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.fsub.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.fsub.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
index 23dbe21379f7f..ffded143a9408 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f32.ll
@@ -222,9 +222,4 @@ define float @v_constained_fsub_f32_fpexcept_strict_fneg_fabs_lhs(float %x, floa
   ret float %val
 }
 
-declare float @llvm.fabs.f32(float)
-declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
-declare <2 x float> @llvm.experimental.constrained.fsub.v2f32(<2 x float>, <2 x float>, metadata, metadata)
-declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
index e7d136c377079..7b7f42b38815d 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_fsub.f64.ll
@@ -145,9 +145,5 @@ define amdgpu_ps <2 x float> @s_constained_fsub_f64_fpexcept_strict(double inreg
   ret <2 x float> %cast
 }
 
-declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { inaccessiblememonly nounwind willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
index 1f3f17c3e0c46..dc5baefd4d3c7 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f16.ll
@@ -376,13 +376,6 @@ define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a,
   ret <4 x half> %result
 }
 
-declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1
-declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1
-declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1
-declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1
-declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
index 456f0bd2434fb..468ebecad9e65 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f32.ll
@@ -236,13 +236,6 @@ define <4 x float> @test_ldexp_v4f32_v4i32(ptr addrspace(1) %out, <4 x float> %a
   ret <4 x float> %result
 }
 
-declare float @llvm.experimental.constrained.ldexp.f32.i16(float, i16, metadata, metadata) #1
-declare float @llvm.experimental.constrained.ldexp.f32.i32(float, i32, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i16(<2 x float>, <2 x i16>, metadata, metadata) #1
-declare <2 x float> @llvm.experimental.constrained.ldexp.v2f32.v2i32(<2 x float>, <2 x i32>, metadata, metadata) #1
-declare <3 x float> @llvm.experimental.constrained.ldexp.v3f32.v3i32(<3 x float>, <3 x i32>, metadata, metadata) #1
-declare <4 x float> @llvm.experimental.constrained.ldexp.v4f32.v4i32(<4 x float>, <4 x i32>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
index b31625f8f7073..616effe72fd44 100644
--- a/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/strict_ldexp.f64.ll
@@ -65,13 +65,6 @@ define <4 x double> @test_ldexp_v4f64_v4i32(ptr addrspace(1) %out, <4 x double>
   ret <4 x double> %result
 }
 
-declare double @llvm.experimental.constrained.ldexp.f64.i16(double, i16, metadata, metadata) #1
-declare double @llvm.experimental.constrained.ldexp.f64.i32(double, i32, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i16(<2 x double>, <2 x i16>, metadata, metadata) #1
-declare <2 x double> @llvm.experimental.constrained.ldexp.v2f64.v2i32(<2 x double>, <2 x i32>, metadata, metadata) #1
-declare <3 x double> @llvm.experimental.constrained.ldexp.v3f64.v3i32(<3 x double>, <3 x i32>, metadata, metadata) #1
-declare <4 x double> @llvm.experimental.constrained.ldexp.v4f64.v4i32(<4 x double>, <4 x i32>, metadata, metadata) #1
-
 attributes #0 = { strictfp }
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
index 3bf7fec81c041..9fd763049c5b0 100644
--- a/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
@@ -545,14 +545,4 @@ bb1:
   ret half %extract
 }
 
-declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #0
-declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #0
-declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #0
-declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) #0
-
-declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) #0
-declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) #0
-declare <3 x half> @llvm.experimental.constrained.fptrunc.v3f16.v3f32(<3 x float>, metadata, metadata) #0
-declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) #0
-
 attributes #0 = { strictfp }
diff --git a/llvm/test/CodeGen/AMDGPU/sub.i16.ll b/llvm/test/CodeGen/AMDGPU/sub.i16.ll
index 2b1577e832051..16f0557d5f31a 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.i16.ll
@@ -162,8 +162,5 @@ define amdgpu_kernel void @v_test_sub_i16_constant_commute(ptr addrspace(1) %out
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.groupstaticsize() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sub.ll b/llvm/test/CodeGen/AMDGPU/sub.ll
index ded308ae4f230..9ed105c192e82 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.ll
@@ -4,8 +4,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone speculatable
-
 define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
 ; GFX6-LABEL: s_sub_i32:
 ; GFX6:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 6ec213a06999b..0d4b1b2b96031 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -1031,7 +1031,5 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(ptr addrspace(1) %out,
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/sub_i1.ll b/llvm/test/CodeGen/AMDGPU/sub_i1.ll
index a6ab1bd9e19f1..73bc7f94ac4aa 100644
--- a/llvm/test/CodeGen/AMDGPU/sub_i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub_i1.ll
@@ -48,5 +48,3 @@ endif:
   store i1 %3, ptr addrspace(1) %out
   ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
index 66c34414b6921..9537d401a2521 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -95,9 +95,5 @@ bb27:                                             ; preds = %bb24
   br label %bb14
 }
 
-
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 873567c3ab6f4..12a60fbdb6661 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -56,6 +56,4 @@ ife:
   ret void
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
-
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll b/llvm/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll
index f79ca18e7672d..830c098fec269 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-eliminate-dead.ll
@@ -15,5 +15,3 @@ define amdgpu_kernel void @foobar() {
   store <2 x i32> %v15, ptr addrspace(1) undef, align 8
   ret void
 }
-
-declare double @llvm.fma.f64(double, double, double)
diff --git a/llvm/test/CodeGen/AMDGPU/swdev380865.ll b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
index 9189cef019cf4..8aad8d2719853 100644
--- a/llvm/test/CodeGen/AMDGPU/swdev380865.ll
+++ b/llvm/test/CodeGen/AMDGPU/swdev380865.ll
@@ -79,8 +79,4 @@ for.cond.cleanup:                                 ; preds = %for.cond4.preheader
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workgroup.id.x() #0
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
index a5e14234c3d41..b813e9a0b5aa5 100644
--- a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
+++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
@@ -54,8 +54,6 @@ define void @test(i1 %c0) #1 {
   exit:                                             ; preds = %switch.blk
     ret void
 }
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 declare i32 @func(i32)#0
 
 attributes #0 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
index 1adf549c76767..6dd5a446287e5 100644
--- a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
+++ b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll
@@ -121,8 +121,5 @@ main_body:
 }
 
 ; Function Attrs: nounwind readonly
-declare float @llvm.cos.f32(float) #1
-
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
 
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
index 17ec27bc49db4..dcc919474d91e 100644
--- a/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
+++ b/llvm/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
@@ -5,10 +5,6 @@
 
 declare void @nonconvergent_func() #0
 declare void @convergent_func() #1
-declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2
-declare void @llvm.amdgcn.ds.gws.sema.release.all(i32 %offset) #2
 
 ; barrier shouldn't be duplicated.
 
diff --git a/llvm/test/CodeGen/AMDGPU/target-cpu.ll b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
index 3119c32dcbaef..8528eccde59af 100644
--- a/llvm/test/CodeGen/AMDGPU/target-cpu.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-cpu.ll
@@ -1,14 +1,8 @@
 ; RUN: llc -mtriple=amdgcn -disable-promote-alloca-to-vector -verify-machineinstrs < %s | FileCheck %s
 
-declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #1
-
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 ; CI+ intrinsic
-declare void @llvm.amdgcn.s.dcache.inv.vol() #0
 
 ; VI+ intrinsic
-declare void @llvm.amdgcn.s.dcache.wb() #0
 
 ; CHECK-LABEL: {{^}}target_none:
 ; CHECK: s_movk_i32 [[OFFSETREG:s[0-9]+]], 0x400
diff --git a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
index 878d59a83cdd6..a7bc9225cfbf0 100644
--- a/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
+++ b/llvm/test/CodeGen/AMDGPU/target-mem-intrinsic-metadata.ll
@@ -12,8 +12,6 @@ define amdgpu_kernel void @ds_append_noalias() {
   ret void
 }
 
-declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #0
-
 attributes #0 = { argmemonly convergent nounwind willreturn }
 
 !0 = !{!1}
diff --git a/llvm/test/CodeGen/AMDGPU/tex-clause-antidep.ll b/llvm/test/CodeGen/AMDGPU/tex-clause-antidep.ll
index 6e906446ccc62..deab4f64aa7a0 100644
--- a/llvm/test/CodeGen/AMDGPU/tex-clause-antidep.ll
+++ b/llvm/test/CodeGen/AMDGPU/tex-clause-antidep.ll
@@ -18,6 +18,3 @@ define amdgpu_vs void @test(<4 x float> inreg %reg0) {
   call void @llvm.r600.store.swizzle(<4 x float> %11, i32 0, i32 0)
   ret void
 }
-
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/texture-input-merge.ll b/llvm/test/CodeGen/AMDGPU/texture-input-merge.ll
index 92aeb6bd428ef..1cdeaf7f7f05c 100644
--- a/llvm/test/CodeGen/AMDGPU/texture-input-merge.ll
+++ b/llvm/test/CodeGen/AMDGPU/texture-input-merge.ll
@@ -24,6 +24,3 @@ define amdgpu_vs void @test(<4 x float> inreg %reg0) {
   call void @llvm.r600.store.swizzle(<4 x float> %18, i32 0, i32 0)
   ret void
 }
-
-declare <4 x float> @llvm.r600.tex(<4 x float>, i32, i32, i32, i32, i32, i32, i32, i32, i32) readnone
-declare void @llvm.r600.store.swizzle(<4 x float>, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
index eef5f57beb07d..4628d0a62e345 100644
--- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
+++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll
@@ -189,6 +189,4 @@ end:                                              ; preds = %kill0, %kill1, %flo
   ret { <4 x float> } undef
 }
 
-declare void @llvm.amdgcn.kill(i1) #0
-
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 3cd6c98ef4b8e..7b644c94abdd6 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -4,9 +4,6 @@
 ; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s
 ; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s
 
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
-
 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
 ; NOHSA-TRAP-GFX900-LABEL: trap:
 ; NOHSA-TRAP-GFX900:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/trap.ll b/llvm/test/CodeGen/AMDGPU/trap.ll
index 2f687295af73e..1fb4623f523bc 100644
--- a/llvm/test/CodeGen/AMDGPU/trap.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap.ll
@@ -25,10 +25,6 @@
 
 ; GCN-WARNING: warning: <unknown>:0:0: in function hsa_debugtrap void (ptr addrspace(1)): debugtrap handler not supported
 
-
-declare void @llvm.trap() #0
-declare void @llvm.debugtrap() #1
-
 ; MESA-TRAP: .section .AMDGPU.config
 ; MESA-TRAP:  .long   47180
 ; MESA-TRAP-NEXT: .long   208
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
index e5bc4c5721b90..a845b8f6e64c5 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-cmp-constant.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=SI -check-prefix=FUNC %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; FUNC-LABEL: {{^}}sextload_i1_to_i32_trunc_cmp_eq_0:
 ; SI: buffer_load_ubyte [[LOAD:v[0-9]+]]
 ; SI: v_and_b32_e32 [[TMP:v[0-9]+]], 1, [[LOAD]]
diff --git a/llvm/test/CodeGen/AMDGPU/trunc.ll b/llvm/test/CodeGen/AMDGPU/trunc.ll
index a9cd0e997e0e5..39b5458748750 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI  %s
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=cypress < %s | FileCheck -enable-var-scope -check-prefix=EG %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @trunc_i64_to_i32_store(ptr addrspace(1) %out, [8 x i32], i64 %in) {
 ; GCN-LABEL: {{^}}trunc_i64_to_i32_store:
 ; GCN: s_load_dword [[SLOAD:s[0-9]+]], s[0:1],
diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index 416dbb226422c..4518d1ab640bd 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -876,13 +876,6 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1
-declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1
-declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
-declare { i128, i1 } @llvm.uadd.with.overflow.i128(i128, i128) #1
-declare { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
index b714fda6f1d07..2dd31443a38d7 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddsat.ll
@@ -614,17 +614,4 @@ define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.uadd.sat.i8(i8, i8) #0
-declare i16 @llvm.uadd.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.uadd.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.uadd.sat.i64(i64, i64) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
index ba52d702c7ed1..e17c96554d1af 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.f64.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope  -check-prefixes=GCN,SI %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 define amdgpu_kernel void @v_uint_to_fp_i64_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) {
 ; SI-LABEL: v_uint_to_fp_i64_to_f64:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
index 79b0a966bc1fb..20e600ff30ea1 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -838,7 +838,5 @@ define amdgpu_kernel void @v_uint_to_fp_v4i64_to_v4f16(ptr addrspace(1) %out, pt
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
index b3d589494b62f..2af82bedbd236 100644
--- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.ll
@@ -129,7 +129,5 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/umed3.ll b/llvm/test/CodeGen/AMDGPU/umed3.ll
index a2d99f1f8c2c2..29bf30d7481eb 100644
--- a/llvm/test/CodeGen/AMDGPU/umed3.ll
+++ b/llvm/test/CodeGen/AMDGPU/umed3.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 ; GCN-LABEL: {{^}}v_test_umed3_r_i_i_i32:
 ; GCN: v_med3_u32 v{{[0-9]+}}, v{{[0-9]+}}, 12, 17
 define amdgpu_kernel void @v_test_umed3_r_i_i_i32(ptr addrspace(1) %out, ptr addrspace(1) %aptr) #1 {
diff --git a/llvm/test/CodeGen/AMDGPU/undef-build-vector.ll b/llvm/test/CodeGen/AMDGPU/undef-build-vector.ll
index 1d00ee956e3a2..5ffb514f1548a 100644
--- a/llvm/test/CodeGen/AMDGPU/undef-build-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/undef-build-vector.ll
@@ -12,6 +12,3 @@ entry:
   store <16 x i16> %i1, ptr addrspace(1) null, align 32
   ret void
 }
-
-declare <2 x i16> @llvm.vector.extract.v2i16.v16i16(<16 x i16>, i64 immarg)
-declare <16 x i16> @llvm.vector.insert.v16i16.v2i16(<16 x i16>, <2 x i16>, i64 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index 0acee5bd5ac19..72cc518ba343d 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -123,7 +123,5 @@ define amdgpu_kernel void @partially_undef_copy() #0 {
   ret void
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
index 7417f865b5173..464ae0b0ac8dd 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-branch-intrinsic-cond.ll
@@ -22,6 +22,4 @@ else:
   ret float %r
 }
 
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind memory(argmem: read) }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
index f60a274f1e592..62a0539ad6567 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -1198,8 +1198,6 @@ bb3:                                              ; preds = %bb2, %bb1
   br label %bb1
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GCN: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
index 426c83d001498..d91394a2f5da8 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
@@ -52,6 +52,5 @@ bb12:                                             ; preds = %bb11, %bb5
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.y() #1
 
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-load-from-tid.ll b/llvm/test/CodeGen/AMDGPU/uniform-load-from-tid.ll
index c3dcc78a20b31..3119548761474 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-load-from-tid.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-load-from-tid.ll
@@ -151,8 +151,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 !0 = !{i32 64, i32 1, i32 1}
 !1 = !{i32 65, i32 2, i32 1}
 !2 = !{i32 64, i32 1, i32 2}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
index 3597d9a7010d3..69537e005803a 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-loop-inside-nonuniform.ll
@@ -61,6 +61,4 @@ done1:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #1 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
index fd0c74a192d38..7fa0046eb4180 100644
--- a/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
@@ -97,13 +97,6 @@ ENDIF28:                                          ; preds = %LOOP
   br label %LOOP
 }
 
-declare float @llvm.minnum.f32(float, float) #1
-declare float @llvm.maxnum.f32(float, float) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #2
-
 attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
index a5e1506114f2d..8121cc8e912ed 100644
--- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
+++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
@@ -518,8 +518,6 @@ bb18:                                             ; preds = %bb17, %bb10
   br label %bb2
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 declare hidden float @spam()
 
 !llvm.module.flags = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll b/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
index 4b84f9175307f..9e7a63948381c 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
@@ -25,4 +25,3 @@ define fastcc void @fadd_test(ptr addrspace(1) nocapture noundef %0, float nound
   %3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) noundef %0, float noundef %1)
   ret void
 }
-declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
index 95fb8a802277f..2fb44ead7d06d 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
@@ -14,6 +14,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 immarg, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
index 57ba4111b353b..f705a4aae063c 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-g16.ll
@@ -16,6 +16,4 @@ main_body:
   ret <4 x float> %v
 }
 
-declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #0
-
 attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index b08586efe2f21..b97a07cf239f3 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -26,5 +26,3 @@ define amdgpu_kernel void @image_sample_test(ptr addrspace(1) %out, float %arg1,
   store <4 x float> %result, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
index a53532f53e020..2f8f62bd1bcf1 100644
--- a/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
+++ b/llvm/test/CodeGen/AMDGPU/use-sgpr-multiple-times.ll
@@ -1,11 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
 
-declare float @llvm.fma.f32(float, float, float) #1
-declare double @llvm.fma.f64(double, double, double) #1
-declare float @llvm.fmuladd.f32(float, float, float) #1
-declare float @llvm.amdgcn.div.fixup.f32(float, float, float) #1
-
 
 ; GCN-LABEL: {{^}}test_sgpr_use_twice_binop:
 ; GCN: s_load_dword [[SGPR:s[0-9]+]],
diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll
index 666ae7c126ae3..b94f9cb9f02f8 100644
--- a/llvm/test/CodeGen/AMDGPU/usubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubo.ll
@@ -809,11 +809,5 @@ exit:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) #1
-declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1
-declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
-declare { <2 x i32>, <2 x i1> } @llvm.usub.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll
index 8cc7025d671c4..0369b62e8e7a3 100644
--- a/llvm/test/CodeGen/AMDGPU/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll
@@ -705,17 +705,4 @@ define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) {
   ret i64 %result
 }
 
-declare i8 @llvm.usub.sat.i8(i8, i8) #0
-declare i16 @llvm.usub.sat.i16(i16, i16) #0
-declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) #0
-declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) #0
-declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) #0
-declare i32 @llvm.usub.sat.i32(i32, i32) #0
-declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) #0
-declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) #0
-declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #0
-declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) #0
-declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) #0
-declare i64 @llvm.usub.sat.i64(i64, i64) #0
-
 attributes #0 = { nounwind readnone speculatable willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/v1024.ll b/llvm/test/CodeGen/AMDGPU/v1024.ll
index fc02ffbf7cb1c..02ca4d6f9c6eb 100644
--- a/llvm/test/CodeGen/AMDGPU/v1024.ll
+++ b/llvm/test/CodeGen/AMDGPU/v1024.ll
@@ -23,8 +23,3 @@ if.then.i62.i:                                    ; preds = %if.else.i, %if.then
   call void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) align 4 undef, ptr addrspace(5) align 16 %alloca, i64 128, i1 false)
   ret void
 }
-
-declare void @llvm.memset.p5.i32(ptr addrspace(5) nocapture readonly, i8, i32, i1 immarg)
-declare void @llvm.memcpy.p5.p5.i64(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg)
-
-declare void @llvm.memcpy.p1.p5.i64(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i64, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll b/llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll
index ca4d689156b49..989f4e2478a82 100644
--- a/llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_add_u64_pseudo_sdwa.ll
@@ -48,6 +48,3 @@ bb:
   store i64 %tmp9, ptr addrspace(1) %tmp7, align 8
   ret void
 }
-
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
index 2fa9750653b6d..88e2a4cfe538d 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cmp_gfx11.ll
@@ -63,10 +63,4 @@ define amdgpu_kernel void @ballot_test(half %x, half %y) {
   ret void
 }
 
-declare i64 @llvm.amdgcn.icmp.i64.i16(i16, i16, i32 immarg) #0
-
-declare i64 @llvm.amdgcn.fcmp.i64.f16(half, half, i32 immarg) #0
-
-declare i64 @llvm.amdgcn.ballot.i64(i1) #0
-
 attributes #0 = { convergent nounwind readnone willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
index fc6df735c05b0..11b3e8e17170f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cndmask.ll
@@ -4,11 +4,6 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX10 %s
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-declare half @llvm.fabs.f16(half)
-declare float @llvm.fabs.f32(float)
-declare double @llvm.fabs.f64(double)
-
 ; All nan values are converted to 0xffffffff
 define amdgpu_kernel void @v_cnd_nan_nosgpr(ptr addrspace(1) %out, i32 %c, ptr addrspace(1) %fptr) #0 {
 ; SI-LABEL: v_cnd_nan_nosgpr:
diff --git a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
index 5a4d0792da203..f34e2c9d04c92 100644
--- a/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_cvt_pk_u8_f32.ll
@@ -1,8 +1,6 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 ; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
-declare i32 @llvm.amdgcn.cvt.pk.u8.f32(float, i32, i32) #0
-
 ; GCN-LABEL: {{^}}v_cvt_pk_u8_f32_idx_0:
 ; GCN: v_cvt_pk_u8_f32 v{{[0-9]+}}, s{{[0-9]+}}, 0, v{{[0-9]+}}
 define amdgpu_kernel void @v_cvt_pk_u8_f32_idx_0(ptr addrspace(1) %out, float %src, i32 %reg) {
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 7fe33d5bd5f72..0e4354bf215fa 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -307,8 +307,6 @@ define float @v_mac_f32_dynamic_ftz(float %a, float %b, float %c) "denormal-fp-m
   ret float %mad
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" }
 attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" }
 attributes #2 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
index d7a837a6e5f71..b9c3454f92c16 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll
@@ -675,8 +675,6 @@ entry:
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #2
-
 attributes #0 = { nounwind "no-signed-zeros-fp-math"="false" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #1 = { nounwind "no-signed-zeros-fp-math"="true" "denormal-fp-math"="preserve-sign,preserve-sign" }
 attributes #2 = { nounwind convergent }
diff --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll
index 8bc8fbd0e0e84..02d20946503cd 100644
--- a/llvm/test/CodeGen/AMDGPU/v_pack.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll
@@ -2,8 +2,6 @@
 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 ; RUN: llc -global-isel -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GISEL %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 define amdgpu_kernel void @v_pack_b32_v2f16(ptr addrspace(1) %in0, ptr addrspace(1) %in1) #0 {
 ; GCN-LABEL: v_pack_b32_v2f16:
 ; GCN:       ; %bb.0:
@@ -246,8 +244,6 @@ define amdgpu_kernel void @v_pack_b32.fneg(ptr addrspace(1) %in0, ptr addrspace(
   ret void
 }
 
-declare half @llvm.fabs.f16(half) #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 
diff --git a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
index 89fef7eead839..eee6103dbaaa6 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sat_pk_u8_i16.ll
@@ -9,12 +9,6 @@
 
 ; <GFX9 has no V_SAT_PK, GFX9+ has V_SAT_PK, GFX11 has V_SAT_PK with t16
 
-declare i16 @llvm.smin.i16(i16, i16)
-declare i16 @llvm.smax.i16(i16, i16)
-
-declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
-declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
-
 define <2 x i16> @basic_smax_smin(i16 %src0, i16 %src1) {
 ; SDAG-VI-LABEL: basic_smax_smin:
 ; SDAG-VI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll b/llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll
index d5347f829002d..8effcb80413f6 100644
--- a/llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_sub_u64_pseudo_sdwa.ll
@@ -48,6 +48,3 @@ bb:
   store i64 %tmp9, ptr addrspace(1) %tmp7, align 8
   ret void
 }
-
-
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 9a64a6d99f46f..c73bf99a37ce1 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -verify-machineinstrs -enable-misched -asm-verbose -disable-block-placement -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=SI %s
 
-declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-
 ; SI-LABEL: {{^}}test_if:
 ; Make sure the i1 values created by the cfg structurizer pass are
 ; moved using VALU instructions
diff --git a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll
index 2c66d38a1be62..78b79f3c8752d 100644
--- a/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll
+++ b/llvm/test/CodeGen/AMDGPU/vcmp-saveexec-to-vcmpx.ll
@@ -161,5 +161,3 @@ after:
 end:
   ret i32 %a
 }
-
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
index 769db215cf034..00f04a3c517be 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -419,9 +419,3 @@ define amdgpu_kernel void @vector_bitcast_to_struct_from_alloca_array(ptr addrsp
   store %struct.v4 %load, ptr addrspace(1) %out
   ret void
 }
-
-declare void @llvm.lifetime.start.p5(i64 immarg, ptr addrspace(5) nocapture)
-
-declare void @llvm.lifetime.end.p5(i64 immarg, ptr addrspace(5) nocapture)
-
-declare void @llvm.assume(i1)
diff --git a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
index eb88c790dfe72..754e7506f4868 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
@@ -4,8 +4,6 @@
 ; Test that when extracting the same unknown vector index from an
 ; insertelement the dynamic indexing is folded away.
 
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 ; No dynamic indexing required
 define amdgpu_kernel void @extract_insert_same_dynelt_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %val, i32 %idx) #1 {
 ; GCN-LABEL: extract_insert_same_dynelt_v4i32:
diff --git a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
index e2ba9d0c087ad..b91afef4b0525 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-legalizer-divergence.ll
@@ -3,8 +3,6 @@
 ; Tests for a bug in SelectionDAG::UpdateNodeOperands exposed by VectorLegalizer
 ; where divergence information is not updated.
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 define amdgpu_kernel void @spam(ptr addrspace(1) noalias %arg) {
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
   %tmp1 = zext i32 %tmp to i64
diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
index 66c49ba8b734d..330ddf4f58f22 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@@ -4879,8 +4879,4 @@ define void @shuffle_v32bf16_concat(ptr addrspace(1) %arg0, ptr addrspace(1) %ar
   ret void
 }
 
-declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
-declare <2 x bfloat> @llvm.fma.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x bfloat>) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-
 attributes #0 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/vectorize-loads.ll b/llvm/test/CodeGen/AMDGPU/vectorize-loads.ll
index 5abaf06141d22..7951e19a0c2c6 100644
--- a/llvm/test/CodeGen/AMDGPU/vectorize-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/vectorize-loads.ll
@@ -47,6 +47,4 @@ entry:
   ret void
 }
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-
 !0 = !{!0}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index 0211c5111c31d..17fed199aaea4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -80,6 +80,4 @@ bb0:
   br label %bb0
 }
 
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #0
-
 attributes #0 = { nounwind memory(argmem: write) }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
index d2364a61ed686..1d92e5cd2b7cb 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll
@@ -668,10 +668,6 @@ if.then:                                          ; preds = %entry
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
-declare float @llvm.amdgcn.image.sample.2d.f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable willreturn }
 attributes #2 = { nounwind readonly willreturn }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
index 65a7554bb66a5..0e66e1ff0db3b 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
@@ -610,7 +610,5 @@ bb145:                                            ; preds = %bb12
   br label %bb12
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
index 0cabfa9aea0e4..43764c5905ad4 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
@@ -483,12 +483,6 @@ bb157:                                            ; preds = %bb24
   br label %bb24
 }
 
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
index 20dc5ad5c8665..ea62b0a314463 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-placement-issue61083.ll
@@ -97,11 +97,6 @@ bb202:                                            ; preds = %bb194, %bb170, %bb9
 }
 
 declare hidden void @__keep_alive()
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare align 4 ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-declare void @llvm.assume(i1 noundef)
-declare void @llvm.amdgcn.s.barrier()
-declare void @llvm.trap()
 
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index 2b96e10fd3cc3..05b3bd4928fc3 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -360,8 +360,6 @@ main_body:
   ret <4 x float> %v1
 }
 
-declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
-
 attributes #0 = { nounwind writeonly }
 attributes #1 = { nounwind readonly }
 attributes #2 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
diff --git a/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
index f99c3f8306ac1..3fa46f3c38fe9 100644
--- a/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/vi-removed-intrinsics.ll
@@ -2,8 +2,6 @@
 
 ; ERROR: error: foo.cl:1:42: in function rsq_legacy_f32 void (ptr addrspace(1), float): intrinsic not supported on subtarget
 
-declare float @llvm.amdgcn.rsq.legacy(float) #0
-
 define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1 {
   %rsq = call float @llvm.amdgcn.rsq.legacy(float %src), !dbg !4
   store float %rsq, ptr addrspace(1) %out, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
index f78b408d78255..1c068a07fc58d 100644
--- a/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
+++ b/llvm/test/CodeGen/AMDGPU/vni8-across-blocks.ll
@@ -1982,5 +1982,3 @@ bb.2:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
index bfa106eb19f92..7ad81cd946304 100644
--- a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
+++ b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll
@@ -45,7 +45,6 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
 
 attributes #0 = { nounwind readnone }
 attributes #1 = { readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll b/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll
index a6dcbb5bbd695..d2763a71b89c9 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.ll
@@ -10,6 +10,3 @@ define amdgpu_cs void @_amdgpu_cs_main(i32 %0) {
   call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %spec.select.3, ptr addrspace(8) zeroinitializer, i32 0, i32 0, i32 0)
   ret void
 }
-
-declare i32 @llvm.amdgcn.mov.dpp.i32(i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/wait.ll b/llvm/test/CodeGen/AMDGPU/wait.ll
index 8d88a1159e4b9..4bfa127823a60 100644
--- a/llvm/test/CodeGen/AMDGPU/wait.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait.ll
@@ -73,10 +73,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.s.barrier() #1
-declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
 attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
index b32ce6eb0acc0..45522920ccc9d 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-looptest.ll
@@ -129,16 +129,12 @@ bb18:                                             ; preds = %bb18, %bb
 }
 
 ; Function Attrs: nounwind readnone speculatable
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #1
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 ; Function Attrs: nounwind readnone speculatable
-declare i32 @llvm.amdgcn.workgroup.id.x() #1
 
 ; Function Attrs: nounwind readnone speculatable
-declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #1
 
 attributes #0 = { "target-cpu"="fiji" "target-features"="-flat-for-global" }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
index cfb2d66df8a71..921ca73d477bc 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll
@@ -254,6 +254,3 @@ define void @store_vscnt_flat(ptr %p) {
 define void @function_prologue() {
   ret void
 }
-
-declare void @llvm.amdgcn.s.barrier()
-declare i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll b/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
index 1afe5cdea8723..68b1b9c52c80f 100644
--- a/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
+++ b/llvm/test/CodeGen/AMDGPU/waterfall_kills_scc.ll
@@ -1,7 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
 ; RUN:  llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX906 %s
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32 immarg) #0
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #1
 
 ; Check that the compiler doesn't crash with a "undefined physical register" error;
 ; bb.0 sets SCC bit in s_cmp_eq_u32 s0, 1
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 82816b4564e86..e26d5d785112b 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -2918,37 +2918,6 @@ define void @callee_no_stack_with_call() #1 {
   ret void
 }
 
-
-declare i32 @llvm.amdgcn.workitem.id.x()
-declare float @llvm.fabs.f32(float)
-declare { float, i1 } @llvm.amdgcn.div.scale.f32(float, float, i1)
-declare { double, i1 } @llvm.amdgcn.div.scale.f64(double, double, i1)
-declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1)
-declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1)
-declare i1 @llvm.amdgcn.class.f32(float, i32)
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64)
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
-declare float @llvm.amdgcn.strict.wwm.f32(float)
-declare float @llvm.amdgcn.wwm.f32(float)
-declare i32 @llvm.amdgcn.wqm.i32(i32)
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32)
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32 immarg)
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32)
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32)
-declare i64 @llvm.amdgcn.fcmp.i64.f32(float, float, i32)
-declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
-declare i32 @llvm.amdgcn.fcmp.i32.f32(float, float, i32)
-declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
-declare void @llvm.amdgcn.kill(i1)
-declare i1 @llvm.amdgcn.wqm.vote(i1)
-declare i1 @llvm.amdgcn.ps.live()
-declare i64 @llvm.cttz.i64(i64, i1)
-declare i32 @llvm.cttz.i32(i32, i1)
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #5
-
 attributes #0 = { nounwind readnone speculatable }
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readnone optnone noinline }
diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
index e0b320aa4f372..07d72d605aca6 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
@@ -701,6 +701,4 @@ define amdgpu_kernel void @widen_i16_global_invariant_load(ptr addrspace(1) %arg
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x()
-
 !0 = !{}
diff --git a/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
index 24c1875159f67..73158e5fab7c0 100644
--- a/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen_extending_scalar_loads.ll
@@ -1,8 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare -amdgpu-codegenprepare-widen-constant-loads < %s | FileCheck -check-prefix=OPT %s
 
-declare ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0
-
 define amdgpu_kernel void @constant_load_i1(ptr addrspace(1) %out, ptr addrspace(4) %in) #0 {
 ; OPT-LABEL: @constant_load_i1(
 ; OPT-NEXT:    [[VAL:%.*]] = load i1, ptr addrspace(4) [[IN:%.*]], align 1
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
index cb3d76cd9c0ba..e97b720b9c6ec 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-f16-f32-matrix-modifiers.ll
@@ -483,17 +483,3 @@ bb:
   store <8 x half> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
-declare float @llvm.fabs.f32(float)
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
index a3973970d50d9..242d3e56bd930 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-imm.ll
@@ -410,15 +410,3 @@ bb:
   store <8 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
index dbb4db05a35c5..94e9c54dabb97 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-iu-modifiers.ll
@@ -300,10 +300,3 @@ bb:
   store <8 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
index 009288dbdf530..f6aeb97bba328 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32-swmmac-index_key.ll
@@ -308,14 +308,3 @@ bb:
   store <8 x float> %res1, ptr addrspace(1) %out1
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x half>, i16)
-declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
index 1012287838f12..8e813a2724ee6 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w32.ll
@@ -345,26 +345,3 @@ bb:
   store <8 x float> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v8f16(<8 x half>, <8 x half>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v8i16(<8 x i16>, <8 x i16>, <8 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<8 x half>, <8 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v8i16(<8 x i16>, <8 x i16>, <8 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v2i32.(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.i32(i1 immarg, i32, i1 immarg, i32, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v8f32.v2i32(<2 x i32>, <2 x i32>, <8 x float>)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <8 x i32>, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v8f32.v8f16.v16f16.i16(<8 x half>, <16 x half>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v8f32.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x float>, i16)
-declare <8 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v8f16.v8f16.v16f16..i16(<8 x half>, <16 x half>, <8 x half>, i16)
-declare <8 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v8i16.v8i16.v16i16.i16(<8 x i16>, <16 x i16>, <8 x i16>, i16)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v8i32.v2i32.v4i32.i16(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v8i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <8 x i32>, i16 %Index, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v8i32.v2i32.v4i32.i32(i1 immarg, <2 x i32>, i1 immarg, <4 x i32>, <8 x i32>, i32 %Index, i1 immarg)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
-declare <8 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v8f32.v2i32.v4i32.i16(<2 x i32>, <4 x i32>, <8 x float>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
index ab1121a705529..3e09f5f9c00a5 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-f16-f32-matrix-modifiers.ll
@@ -440,17 +440,3 @@ bb:
   store <4 x half> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-declare float @llvm.fabs.f32(float)
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v8f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i16(<4 x half>, <8 x half>, <4 x float>, i16)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i16(<4 x half>, <8 x half>, <4 x half>, i16)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
index 829d77018ff96..2bed41b74ceac 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-imm.ll
@@ -362,15 +362,3 @@ bb:
   store <4 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
index 161d222d10ff7..134ee5d2311d9 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-iu-modifiers.ll
@@ -260,10 +260,3 @@ bb:
   store <4 x i32> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
index 511a116a78be5..7e8cacd6a9833 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64-swmmac-index_key.ll
@@ -458,15 +458,3 @@ bb:
   store <4 x float> %res3, ptr addrspace(1) %out3
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i8(<4 x half>, <8 x half>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i8(<4 x i16>, <8 x i16>, <4 x float>, i8)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i8(<4 x half>, <8 x half>, <4 x half>, i8)
-declare <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i8(<4 x i16>, <8 x i16>, <4 x i16>, i8)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
index 5fde11cb4b1b1..3e68d3a63e648 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma-gfx12-w64.ll
@@ -308,26 +308,3 @@ bb:
   store <4 x float> %res, ptr addrspace(1) %out
   ret void
 }
-
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v4f16(<4 x half>, <4 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v4i16(<4 x i16>, <4 x i16>, <4 x float>)
-declare <4 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v4f16.v4f16(<4 x half>, <4 x half>, <4 x half>, i1 immarg)
-declare <4 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v4i16.v4i16(<4 x i16>, <4 x i16>, <4 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.fp8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.fp8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.fp8.v4f32.i32.(i32, i32, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf8.bf8.v4f32.i32(i32, i32, <4 x float>)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x32.iu4.v4i32.i32(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.f16.v4f32.v4f16.v8f16.i8(<4 x half>, <8 x half>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf16.v4f32.v4i16.v8i16.i8(<4 x i16>, <8 x i16>, <4 x float>, i8)
-declare <4 x half> @llvm.amdgcn.swmmac.f16.16x16x32.f16.v4f16.v4f16.v8f16.i8(<4 x half>, <8 x half>, <4 x half>, i8)
-declare <4 x i16> @llvm.amdgcn.swmmac.bf16.16x16x32.bf16.v4i16.v4i16.v8i16.i8(<4 x i16>, <8 x i16>, <4 x i16>, i8)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu8.v4i32.i32.v2i32.i8(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i8 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x32.iu4.v4i32.i32.i32.i16(i1 immarg, i32, i1 immarg, i32, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.swmmac.i32.16x16x64.iu4.v4i32.i32.v2i32.i16(i1 immarg, i32, i1 immarg, <2 x i32>, <4 x i32>, i16 %Index, i1 immarg)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.fp8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.fp8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
-declare <4 x float> @llvm.amdgcn.swmmac.f32.16x16x32.bf8.bf8.v4f32.i32.v2i32.i8(i32, <2 x i32>, <4 x float>, i8)
diff --git a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
index 801bb06aacb63..05cfdfa298301 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_modifiers.ll
@@ -1,7 +1,5 @@
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s
 
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half>, <16 x half>, <4 x float>)
-
 ; Make sure we don't crash when trying to select modifiers in SelectVOP3PMods.
 
 define amdgpu_cs void @xyz () {
diff --git a/llvm/test/CodeGen/AMDGPU/wmma_multiple_32.ll b/llvm/test/CodeGen/AMDGPU/wmma_multiple_32.ll
index 12b65986cf1c7..d7629530db9a1 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma_multiple_32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_multiple_32.ll
@@ -1,13 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W32
 
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v8f32.v16f16(<16 x half>, <16 x half> , <8 x float>)
-declare <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v8f32.v16i16(<16 x i16>, <16 x i16> , <8 x float>)
-declare <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v16f16.v16f16(<16 x half>, <16 x half> , <16 x half>, i1 immarg)
-declare <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v16i16.v16i16(<16 x i16>, <16 x i16> , <16 x i16>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v8i32.v4i32(i1 immarg, <4 x i32>, i1 immarg, <4 x i32> , <8 x i32>, i1 immarg)
-declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v8i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32> , <8 x i32>, i1 immarg)
-
 ; The tests demonstrate that the following WMMA register constraints are satisfied.
 ;
 ; v_wmma D, A, B, C
diff --git a/llvm/test/CodeGen/AMDGPU/wmma_multiple_64.ll b/llvm/test/CodeGen/AMDGPU/wmma_multiple_64.ll
index b43969dd4f5c0..ca1b6b0e96d3e 100644
--- a/llvm/test/CodeGen/AMDGPU/wmma_multiple_64.ll
+++ b/llvm/test/CodeGen/AMDGPU/wmma_multiple_64.ll
@@ -1,13 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck %s --check-prefix=W64
 
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16.v4f32.v16f16(<16 x half>, <16 x half>, <4 x float>)
-declare <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16.v4f32.v16i16(<16 x i16>, <16 x i16>, <4 x float>)
-declare <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16.v8f16.v16f16(<16 x half>, <16 x half>, <8 x half>, i1 immarg)
-declare <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16.v8i16.v16i16(<16 x i16>, <16 x i16>, <8 x i16>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8.v4i32.v4i32(i1 immarg, <4 x i32>, i1 immarg, <4 x i32>, <4 x i32>, i1 immarg)
-declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4.v4i32.v2i32(i1 immarg, <2 x i32>, i1 immarg, <2 x i32>, <4 x i32>, i1 immarg)
-
 ; The tests demonstrate that the following WMMA register constraints are satisfied.
 ;
 ; v_wmma D, A, B, C
diff --git a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
index 40e4692a18ec7..a81299c1b9ce9 100644
--- a/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/workgroup-id-in-arch-sgprs.ll
@@ -106,10 +106,6 @@ define amdgpu_kernel void @workgroup_id_xyz(ptr addrspace(1) %ptrx, ptr addrspac
 
   ret void
 }
-
-declare i32 @llvm.amdgcn.workgroup.id.x()
-declare i32 @llvm.amdgcn.workgroup.id.y()
-declare i32 @llvm.amdgcn.workgroup.id.z()
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX12-GISEL: {{.*}}
 ; GFX12-SDAG: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll b/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
index d7ce562292b86..36ba638f126e9 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm-gfx11.ll
@@ -58,5 +58,3 @@ main_body:
 }
 
 attributes #1 = { nounwind readnone speculatable willreturn }
-declare float @llvm.amdgcn.lds.param.load(i32 immarg, i32 immarg, i32) #1
-declare float @llvm.amdgcn.lds.direct.load(i32) #1
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 95dfb12c8dbae..5afb762ae06f8 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -3312,46 +3312,6 @@ main_body:
   ret void
 }
 
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
-
-declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #3
-declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #3
-
-declare void @llvm.amdgcn.struct.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.struct.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32 immarg) #2
-declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #3
-declare float @llvm.amdgcn.struct.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32, i32) #3
-
-declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare float @llvm.amdgcn.image.sample.1d.f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #3
-declare void @llvm.amdgcn.kill(i1) #1
-declare float @llvm.amdgcn.wqm.f32(float) #3
-declare i32 @llvm.amdgcn.wqm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wwm.f32(float) #3
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #3
-declare float @llvm.amdgcn.wwm.f32(float) #3
-declare i32 @llvm.amdgcn.wwm.i32(i32) #3
-declare float @llvm.amdgcn.strict.wqm.f32(float) #3
-declare i32 @llvm.amdgcn.strict.wqm.i32(i32) #3
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #4
-declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #3
-declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #3
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #3
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #1
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
-declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
-declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #7
-
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readonly }
 attributes #3 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
index de3b1d5bf78b3..d6cf5b199c60a 100644
--- a/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/write-register-vgpr-into-sgpr.ll
@@ -6,10 +6,6 @@
 ; vgpr value into a scalar register, but I don't think there's much we
 ; can do to avoid this.
 
-declare void @llvm.write_register.i32(metadata, i32) #0
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare void @llvm.amdgcn.wave.barrier() #2
-
 define amdgpu_kernel void @write_vgpr_into_sgpr() {
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   call void @llvm.write_register.i32(metadata !0, i32 %tid)
diff --git a/llvm/test/CodeGen/AMDGPU/write_register.ll b/llvm/test/CodeGen/AMDGPU/write_register.ll
index f6ac26e8ecfc6..ae8809e590c2d 100644
--- a/llvm/test/CodeGen/AMDGPU/write_register.ll
+++ b/llvm/test/CodeGen/AMDGPU/write_register.ll
@@ -1,9 +1,6 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=bonaire -enable-misched=0 -verify-machineinstrs < %s | FileCheck %s
 
-declare void @llvm.write_register.i32(metadata, i32) #0
-declare void @llvm.write_register.i64(metadata, i64) #0
-
 ; CHECK-LABEL: {{^}}test_write_m0:
 define amdgpu_kernel void @test_write_m0(i32 %val) #0 {
   call void @llvm.write_register.i32(metadata !0, i32 0)
@@ -89,8 +86,6 @@ define amdgpu_kernel void @test_write_exec_hi(i32 %val) #0 {
   ret void
 }
 
-declare void @llvm.amdgcn.wave.barrier() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { convergent nounwind }
 
diff --git a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
index bdfa89d9f3044..14a6115cc9e4a 100644
--- a/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
+++ b/llvm/test/CodeGen/AMDGPU/wrong-transalu-pos-fix.ll
@@ -36,40 +36,28 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tgid.z() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.local.size.z() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.z() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.x() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.y() #1
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.global.size.z() #1
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index 11f6a2960776b..51186a605ecd6 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -1391,18 +1391,4 @@ define amdgpu_gfx <32 x i32> @strict_wwm_callee_saves(<32 x i32> inreg %keep, pt
   ret <32 x i32> %keep
 }
 
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
-declare i64 @llvm.amdgcn.strict.wwm.i64(i64)
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64)
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
-
 attributes #0 = { "amdgpu-waves-per-eu"="5,5" }
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
index e5cebc1c31832..fcbf17aa2ae09 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved.ll
@@ -1747,21 +1747,5 @@ define amdgpu_cs void @strict_wwm_amdgpu_cs_main(<4 x i32> inreg %desc, i32 %ind
   ret void
 }
 
-declare i32 @llvm.amdgcn.strict.wwm.i32(i32)
-declare i64 @llvm.amdgcn.strict.wwm.i64(i64)
-declare i32 @llvm.amdgcn.wwm.i32(i32)
-declare i64 @llvm.amdgcn.wwm.i64(i64)
-declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32)
-declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64)
-declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1)
-declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32)
-declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32)
-declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32)
-declare <4 x i32> @llvm.amdgcn.s.buffer.load.v4i32(<4 x i32>, i32, i32)
-
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"amdhsa_code_object_version", i32 500}
diff --git a/llvm/test/CodeGen/AMDGPU/xnor.ll b/llvm/test/CodeGen/AMDGPU/xnor.ll
index 0099a370cec7e..358531ebefae4 100644
--- a/llvm/test/CodeGen/AMDGPU/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/xnor.ll
@@ -193,4 +193,3 @@ entry:
 }
 
 ; Function Attrs: nounwind readnone
-declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
index a7650d1286ddc..e10e6b95b875a 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll
@@ -25,5 +25,3 @@ main_body:
   %tmp43 = select i1 %tmp42, float 1.000000e+00, float 0.000000e+00
   ret float %tmp43
 }
-
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll b/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
index f9a7e887ada23..a43530f16e6ed 100644
--- a/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/zext-divergence-driven-isel.ll
@@ -86,7 +86,5 @@ define amdgpu_kernel void @zext_i16_to_i64_divergent(ptr addrspace(1) %out, i16
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #1
-
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone speculatable }
diff --git a/llvm/test/CodeGen/AMDGPU/zext-lid.ll b/llvm/test/CodeGen/AMDGPU/zext-lid.ll
index 6fea05d8d7406..ecf35c0f6d215 100644
--- a/llvm/test/CodeGen/AMDGPU/zext-lid.ll
+++ b/llvm/test/CodeGen/AMDGPU/zext-lid.ll
@@ -91,12 +91,6 @@ entry:
   ret void
 }
 
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-declare i32 @llvm.amdgcn.workitem.id.y() #2
-
-declare i32 @llvm.amdgcn.workitem.id.z() #2
-
 attributes #0 = { nounwind "amdgpu-flat-work-group-size"="64,128" }
 attributes #1 = { nounwind "amdgpu-flat-work-group-size"="512,512" }
 attributes #2 = { nounwind readnone speculatable }