diff --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll index ae4e8ee2736aaf..5615a006292011 100644 --- a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll +++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll @@ -40,7 +40,7 @@ define void @test1() { unreachable } -define amdgpu_kernel void @test2(i32* %p, i32 %x) { +define amdgpu_kernel void @test2(ptr %p, i32 %x) { ; GFX9-LABEL: test2: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c @@ -99,7 +99,7 @@ then: unreachable else: - store i32 %x, i32* %p + store i32 %x, ptr %p ret void } diff --git a/llvm/test/CodeGen/AMDGPU/cube.ll b/llvm/test/CodeGen/AMDGPU/cube.ll index 3fd4ee68b6df3a..51b7ca1965e4ed 100644 --- a/llvm/test/CodeGen/AMDGPU/cube.ll +++ b/llvm/test/CodeGen/AMDGPU/cube.ll @@ -12,7 +12,7 @@ declare float @llvm.amdgcn.cubema(float, float, float) #0 ; GCN-DAG: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN-DAG: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: _store_dwordx4 -define amdgpu_kernel void @cube(<4 x float> addrspace(1)* %out, float %a, float %b, float %c) #1 { +define amdgpu_kernel void @cube(ptr addrspace(1) %out, float %a, float %b, float %c) #1 { %cubeid = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c) %cubesc = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c) %cubetc = call float @llvm.amdgcn.cubetc(float %a, float %b, float %c) @@ -22,7 +22,7 @@ define amdgpu_kernel void @cube(<4 x float> addrspace(1)* %out, float %a, float %vec1 = insertelement <4 x float> %vec0, float %cubesc, i32 1 %vec2 = insertelement <4 x float> %vec1, float %cubetc, i32 2 %vec3 = insertelement <4 x float> %vec2, float %cubema, i32 3 - store <4 x float> %vec3, <4 x float> addrspace(1)* %out + store <4 x float> %vec3, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll index 71a599a559b03d..a3fd636065cddd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.alignbyte.ll @@ -4,9 +4,9 @@ declare i32 @llvm.amdgcn.alignbyte(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_alignbyte_b32: ; GCN: v_alignbyte_b32 {{[vs][0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}} -define amdgpu_kernel void @v_alignbyte_b32(i32 addrspace(1)* %out, i32 %src1, i32 %src2, i32 %src3) #1 { +define amdgpu_kernel void @v_alignbyte_b32(ptr addrspace(1) %out, i32 %src1, i32 %src2, i32 %src3) #1 { %val = call i32 @llvm.amdgcn.alignbyte(i32 %src1, i32 %src2, i32 %src3) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll index 5e9b7116673228..e96a3545cbd77e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.csub.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs | FileCheck %s -check-prefix=GCN declare i32 @llvm.amdgcn.buffer.atomic.csub(i32, <4 x i32>, i32, i32, i1) -declare i32 @llvm.amdgcn.global.atomic.csub(i32 addrspace(1)*, i32) +declare i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1), i32) ; GCN-LABEL: {{^}}buffer_atomic_csub: ; GCN: buffer_atomic_csub v0, v1, s[0:3], 0 idxen glc @@ -22,17 +22,17 @@ main_body: ; GCN-LABEL: {{^}}global_atomic_csub: ; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9:]+}}, s{{\[[0-9]+:[0-9]+\]}} glc -define amdgpu_kernel void @global_atomic_csub(i32 addrspace(1)* %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub(ptr addrspace(1) %ptr, i32 %data) { main_body: - %ret = call i32 @llvm.amdgcn.global.atomic.csub(i32 addrspace(1)* %ptr, i32 %data) + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %ptr, i32 %data) ret void } ; GCN-LABEL: {{^}}global_atomic_csub_off4: ; GCN: global_atomic_csub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 glc -define amdgpu_kernel void @global_atomic_csub_off4(i32 addrspace(1)* %ptr, i32 %data) { +define amdgpu_kernel void @global_atomic_csub_off4(ptr addrspace(1) %ptr, i32 %data) { main_body: - %p = getelementptr i32, i32 addrspace(1)* %ptr, i64 1 - %ret = call i32 @llvm.amdgcn.global.atomic.csub(i32 addrspace(1)* %p, i32 %data) + %p = getelementptr i32, ptr addrspace(1) %ptr, i64 1 + %ret = call i32 @llvm.amdgcn.global.atomic.csub(ptr addrspace(1) %p, i32 %data) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll index dc991aeef4f169..578ba584d8960c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.dec.ll @@ -2,13 +2,13 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s -declare i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -18,9 +18,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] -define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -30,10 +30,10 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ad ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 -define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -45,8 +45,8 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_dec_u32 [[VPTR]], [[DATA]] -define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_dec_noret_i32(ptr addrspace(3) %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -56,9 +56,9 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32(i32 addrspace(3)* %ptr) noun ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_dec_u32 v{{[0-9]+}}, [[K]] offset:16 -define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -67,9 +67,9 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i32_offset(i32 addrspace(3)* %pt ; CIVI: buffer_atomic_dec [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_dec_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -79,10 +79,10 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32(i32 addrspace(1)* %out, i32 ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_atomic_dec v{{[0-9]+}}, [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -92,8 +92,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset(i32 addrspace(1)* %o ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_atomic_dec [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_dec_noret_i32(ptr addrspace(1) %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -103,9 +103,9 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32(i32 addrspace(1)* %ptr) n ; GFX9-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}} ; GFX9: global_atomic_dec [[ZERO]], [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} -define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(ptr addrspace(1) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -113,13 +113,13 @@ define amdgpu_kernel void @global_atomic_dec_noret_i32_offset(i32 addrspace(1)* ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} ; VI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out.gep + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out.gep ret void } @@ -127,20 +127,20 @@ define amdgpu_kernel void @global_atomic_dec_ret_i32_offset_addr64(i32 addrspace ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CI: buffer_atomic_dec [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} ; VI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } ; GCN-LABEL: {{^}}flat_atomic_dec_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i32(ptr %out, ptr %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } @@ -148,18 +148,18 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32(i32* %out, i32* %ptr) #0 { ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(i32* %out, i32* %ptr) #0 { - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset(ptr %out, ptr %ptr) #0 { + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } ; GCN-LABEL: {{^}}flat_atomic_dec_noret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i32(ptr %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -167,9 +167,9 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32(i32* %ptr) nounwind { ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind { - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(ptr %ptr) nounwind { + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -177,13 +177,13 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset(i32* %ptr) nounwind ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} ; GFX9: flat_atomic_dec v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %out.gep = getelementptr i32, i32* %out, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out.gep + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %out.gep = getelementptr i32, ptr %out, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out.gep ret void } @@ -191,11 +191,11 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i32_offset_addr64(i32* %out, i32* ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} ; GFX9: flat_atomic_dec v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.dec.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -203,9 +203,9 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i32_offset_addr64(i32* %ptr) #0 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i64(ptr %out, ptr %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } @@ -214,10 +214,10 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64(i64* %out, i64* %ptr) #0 { ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) #0 { - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(ptr %out, ptr %ptr) #0 { + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } @@ -225,8 +225,8 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset(i64* %out, i64* %ptr) ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i64(ptr %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -235,9 +235,9 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64(i64* %ptr) nounwind { ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind { - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(ptr %ptr) nounwind { + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -246,13 +246,13 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset(i64* %ptr) nounwind ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}} -define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %out.gep = getelementptr i64, i64* %out, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out.gep + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %out.gep = getelementptr i64, ptr %out, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out.gep ret void } @@ -261,11 +261,11 @@ define amdgpu_kernel void @flat_atomic_dec_ret_i64_offset_addr64(i64* %out, i64* ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} ; GFX9: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}} -define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -277,13 +277,13 @@ define amdgpu_kernel void @flat_atomic_dec_noret_i64_offset_addr64(i64* %ptr) #0 ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; GCN: ds_dec_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 -define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_dec_shl_base_lds_0(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i32 %val0, i32 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = call i32 @llvm.amdgcn.atomic.dec.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i32 %val0, ptr addrspace(1) %out ret void } @@ -294,9 +294,9 @@ define amdgpu_kernel void @atomic_dec_shl_base_lds_0(i32 addrspace(1)* %out, i32 ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -307,10 +307,10 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ad ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32 -define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -321,8 +321,8 @@ define amdgpu_kernel void @lds_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_dec_noret_i64(ptr addrspace(3) %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -333,9 +333,9 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64(i64 addrspace(3)* %ptr) noun ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_dec_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32{{$}} -define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -346,9 +346,9 @@ define amdgpu_kernel void @lds_atomic_dec_noret_i64_offset(i64 addrspace(3)* %pt ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_dec_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -358,10 +358,10 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64(i64 addrspace(1)* %out, i64 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} ; GFX9: global_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -371,8 +371,8 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset(i64 addrspace(1)* %o ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_dec_noret_i64(ptr addrspace(1) %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -382,9 +382,9 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64(i64 addrspace(1)* %ptr) n ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} ; GFX9: global_atomic_dec_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} -define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(ptr addrspace(1) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -394,13 +394,13 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset(i64 addrspace(1)* ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} -define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out.gep + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out.gep ret void } @@ -410,11 +410,11 @@ define amdgpu_kernel void @global_atomic_dec_ret_i64_offset_addr64(i64 addrspace ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_dec_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_dec_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.dec.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -426,13 +426,13 @@ define amdgpu_kernel void @global_atomic_dec_noret_i64_offset_addr64(i64 addrspa ; GCN-DAG: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} ; GCN: ds_dec_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 -define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_dec_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i64 %val0, i64 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val0 = call i64 @llvm.amdgcn.atomic.dec.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i64 %val0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll index 84b675d8a78661..0bcc4db1480c39 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.gfx90a.ll @@ -3,8 +3,8 @@ declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1) -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) ; GFX908: LLVM ERROR: Cannot select: {{.+}}: f32,ch = BUFFER_ATOMIC_FADD @@ -42,52 +42,52 @@ main_body: ; GFX90A-LABEL: {{^}}global_atomic_add_f32: ; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off glc -define amdgpu_ps float @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps float @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { main_body: - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret float %ret } ; GFX90A-LABEL: {{^}}global_atomic_add_f32_off4: ; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:4 glc -define amdgpu_ps float @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps float @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { main_body: - %p = getelementptr float, float addrspace(1)* %ptr, i64 1 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) ret float %ret } ; GFX90A-LABEL: {{^}}global_atomic_add_f32_offneg4: ; GFX90A: global_atomic_add_f32 v0, v[0:1], v2, off offset:-4 glc -define amdgpu_ps float @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) { +define amdgpu_ps float @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { main_body: - %p = getelementptr float, float addrspace(1)* %ptr, i64 -1 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) ret float %ret } ; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16: ; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off glc -define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret <2 x half> %ret } ; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: ; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:4 glc -define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) ret <2 x half> %ret } ; GFX90A-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: ; GFX90A: global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:-4 glc -define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_ps <2 x half> @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) ret <2 x half> %ret } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll index 635327b06e5529..43335add49f91e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.fadd.ll @@ -3,9 +3,9 @@ declare float @llvm.amdgcn.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i1) declare <2 x half> @llvm.amdgcn.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i1) -declare float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)*, float) -declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)*, <2 x half>) -declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float*, float) +declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1), float) +declare <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1), <2 x half>) +declare float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr, float) ; GCN-LABEL: {{^}}buffer_atomic_add_f32: ; GCN: buffer_atomic_add_f32 v0, v1, s[0:3], 0 idxen @@ -41,53 +41,53 @@ main_body: ; GCN-LABEL: {{^}}global_atomic_add_f32: ; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @global_atomic_add_f32(float addrspace(1)* %ptr, float %data) { +define amdgpu_kernel void @global_atomic_add_f32(ptr addrspace(1) %ptr, float %data) { main_body: - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void } ; GCN-LABEL: {{^}}global_atomic_add_f32_off4: ; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 -define amdgpu_kernel void @global_atomic_add_f32_off4(float addrspace(1)* %ptr, float %data) { +define amdgpu_kernel void @global_atomic_add_f32_off4(ptr addrspace(1) %ptr, float %data) { main_body: - %p = getelementptr float, float addrspace(1)* %ptr, i64 1 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) + %p = getelementptr float, ptr addrspace(1) %ptr, i64 1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) ret void } ; GCN-LABEL: {{^}}global_atomic_add_f32_offneg4: ; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4 -define amdgpu_kernel void @global_atomic_add_f32_offneg4(float addrspace(1)* %ptr, float %data) { +define amdgpu_kernel void @global_atomic_add_f32_offneg4(ptr addrspace(1) %ptr, float %data) { main_body: - %p = getelementptr float, float addrspace(1)* %ptr, i64 -1 - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %p, float %data) + %p = getelementptr float, ptr addrspace(1) %ptr, i64 -1 + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %p, float %data) ret void } ; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16: ; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_pk_add_v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_kernel void @global_atomic_pk_add_v2f16(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %ptr, <2 x half> %data) + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %ptr, <2 x half> %data) ret void } ; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_off4: ; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:4 -define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_kernel void @global_atomic_pk_add_v2f16_off4(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 1 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) ret void } ; GCN-LABEL: {{^}}global_atomic_pk_add_v2f16_offneg4: ; GCN: global_atomic_pk_add_f16 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:-4{{$}} -define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(<2 x half> addrspace(1)* %ptr, <2 x half> %data) { +define amdgpu_kernel void @global_atomic_pk_add_v2f16_offneg4(ptr addrspace(1) %ptr, <2 x half> %data) { main_body: - %p = getelementptr <2 x half>, <2 x half> addrspace(1)* %ptr, i64 -1 - %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1v2f16.v2f16(<2 x half> addrspace(1)* %p, <2 x half> %data) + %p = getelementptr <2 x half>, ptr addrspace(1) %ptr, i64 -1 + %ret = call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %p, <2 x half> %data) ret void } @@ -95,15 +95,15 @@ main_body: ; the feature set. ; GCN-LABEL: {{^}}global_atomic_fadd_f32_wrong_subtarget: ; GCN: global_atomic_add_f32 v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(float addrspace(1)* %ptr, float %data) #0 { - %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1f32.f32(float addrspace(1)* %ptr, float %data) +define amdgpu_kernel void @global_atomic_fadd_f32_wrong_subtarget(ptr addrspace(1) %ptr, float %data) #0 { + %ret = call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %ptr, float %data) ret void } ; GCN-LABEL: {{^}}flat_atomic_fadd_f32_wrong_subtarget: ; GCN: flat_atomic_add_f32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} -define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(float* %ptr, float %data) #1 { - %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0f32.f32(float* %ptr, float %data) +define amdgpu_kernel void @flat_atomic_fadd_f32_wrong_subtarget(ptr %ptr, float %data) #1 { + %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p0.f32(ptr %ptr, float %data) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll index 23792c6df0bc82..fcfa6715cb6f43 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.atomic.inc.ll @@ -2,13 +2,13 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s -declare i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32, i32, i32, i1) #2 -declare i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) nocapture, i32, i32, i32, i1) #2 +declare i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr nocapture, i32, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* nocapture, i64, i32, i32, i1) #2 -declare i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) nocapture, i64, i32, i32, i1) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr nocapture, i64, i32, i32, i1) #2 declare i32 @llvm.amdgcn.workitem.id.x() #1 @@ -18,9 +18,9 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] -define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -30,10 +30,10 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ad ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] offset:16 -define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -45,8 +45,8 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 [[DATA:v[0-9]+]], 4 ; GCN-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[SPTR]] ; GCN: ds_inc_u32 [[VPTR]], [[DATA]] -define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_inc_noret_i32(ptr addrspace(3) %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -56,9 +56,9 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32(i32 addrspace(3)* %ptr) noun ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_u32 v{{[0-9]+}}, [[K]] offset:16 -define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(3)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(3) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -66,9 +66,9 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i32_offset(i32 addrspace(3)* %pt ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_inc_ret_i32(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -76,10 +76,10 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32(i32 addrspace(1)* %out, i32 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 glc{{$}} ; GFX9: global_atomic_inc v{{[0-9]+}}, v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out ret void } @@ -87,8 +87,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset(i32 addrspace(1)* %o ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_inc_noret_i32(ptr addrspace(1) %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -96,9 +96,9 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32(i32 addrspace(1)* %ptr) n ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: buffer_atomic_inc [[K]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16{{$}} ; GFX9: global_atomic_inc v{{[0-9]+}}, [[K]], s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}} -define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* %ptr) nounwind { - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(ptr addrspace(1) %ptr) nounwind { + %gep = getelementptr i32, ptr addrspace(1) %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -106,13 +106,13 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset(i32 addrspace(1)* ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20 glc{{$}} ; VI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32 addrspace(1)* %out.gep + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i32, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr addrspace(1) %out.gep ret void } @@ -120,11 +120,11 @@ define amdgpu_kernel void @global_atomic_inc_ret_i32_offset_addr64(i32 addrspace ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CI: buffer_atomic_inc [[K]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:20{{$}} ; VI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i32, i32 addrspace(1)* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i32, ptr addrspace(1) %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p1(ptr addrspace(1) %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -133,13 +133,13 @@ define amdgpu_kernel void @global_atomic_inc_noret_i32_offset_addr64(i32 addrspa ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i32: ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; GCN: ds_inc_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8 -define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %arrayidx0, i32 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i32 %val0, i32 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i32], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %arrayidx0, i32 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i32 %val0, ptr addrspace(1) %out ret void } @@ -147,9 +147,9 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i32(i32 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -157,10 +157,10 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ad ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32 -define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(3)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(3) %ptr) #0 { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -168,8 +168,8 @@ define amdgpu_kernel void @lds_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_inc_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_inc_noret_i64(ptr addrspace(3) %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -177,9 +177,9 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64(i64 addrspace(3)* %ptr) noun ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: ds_inc_u64 v{{[0-9]+}}, v[[[KLO]]:[[KHI]]] offset:32{{$}} -define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(3)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(ptr addrspace(3) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(3) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -189,9 +189,9 @@ define amdgpu_kernel void @lds_atomic_inc_noret_i64_offset(i64 addrspace(3)* %pt ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 glc{{$}} ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_inc_ret_i64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -201,10 +201,10 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64(i64 addrspace(1)* %out, i64 ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32 glc{{$}} ; GFX9: global_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32 glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out +define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out ret void } @@ -215,8 +215,8 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset(i64 addrspace(1)* %o ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}} ; GFX9: global_atomic_inc_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_inc_noret_i64(ptr addrspace(1) %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -226,9 +226,9 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64(i64 addrspace(1)* %ptr) n ; GFX9: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}} ; CIVI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:32{{$}} ; GFX9: global_atomic_inc_x2 v[[ZERO]], v[[[KLO]]:[[KHI]]], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}} -define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* %ptr) nounwind { - %gep = getelementptr i64, i64 addrspace(1)* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(ptr addrspace(1) %ptr) nounwind { + %gep = getelementptr i64, ptr addrspace(1) %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -238,13 +238,13 @@ define amdgpu_kernel void @global_atomic_inc_noret_i64_offset(i64 addrspace(1)* ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40 glc{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} -define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace(1)* %out, i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64 addrspace(1)* %out.gep + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %out.gep = getelementptr i64, ptr addrspace(1) %out, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr addrspace(1) %out.gep ret void } @@ -254,20 +254,20 @@ define amdgpu_kernel void @global_atomic_inc_ret_i64_offset_addr64(i64 addrspace ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CI: buffer_atomic_inc_x2 v[[[KLO]]:[[KHI]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:40{{$}} ; VI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]]{{$}} -define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(i64 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @global_atomic_inc_noret_i64_offset_addr64(ptr addrspace(1) %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64 addrspace(1)* %ptr, i32 %id - %gep = getelementptr i64, i64 addrspace(1)* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr addrspace(1) %ptr, i32 %id + %gep = getelementptr i64, ptr addrspace(1) %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p1(ptr addrspace(1) %gep, i64 42, i32 0, i32 0, i1 false) ret void } ; GCN-LABEL: {{^}}flat_atomic_inc_ret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i32(ptr %out, ptr %ptr) #0 { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } @@ -275,18 +275,18 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32(i32* %out, i32* %ptr) #0 { ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16 glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(i32* %out, i32* %ptr) #0 { - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset(ptr %out, ptr %ptr) #0 { + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out ret void } ; GCN-LABEL: {{^}}flat_atomic_inc_noret_i32: ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i32(ptr %ptr) nounwind { + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %ptr, i32 42, i32 0, i32 0, i1 false) ret void } @@ -294,9 +294,9 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32(i32* %ptr) nounwind { ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:16{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind { - %gep = getelementptr i32, i32* %ptr, i32 4 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(ptr %ptr) nounwind { + %gep = getelementptr i32, ptr %ptr, i32 4 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -304,13 +304,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset(i32* %ptr) nounwind ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] glc{{$}} ; GFX9: flat_atomic_inc v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20 glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(ptr %out, ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %out.gep = getelementptr i32, i32* %out, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) - store i32 %result, i32* %out.gep + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %out.gep = getelementptr i32, ptr %out, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) + store i32 %result, ptr %out.gep ret void } @@ -318,11 +318,11 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i32_offset_addr64(i32* %out, i32* ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; CIVI: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]]{{$}} ; GFX9: flat_atomic_inc v{{\[[0-9]+:[0-9]+\]}}, [[K]] offset:20{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i32, i32* %ptr, i32 %id - %gep = getelementptr i32, i32* %gep.tid, i32 5 - %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0i32(i32* %gep, i32 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i32, ptr %ptr, i32 %id + %gep = getelementptr i32, ptr %gep.tid, i32 5 + %result = call i32 @llvm.amdgcn.atomic.inc.i32.p0(ptr %gep, i32 42, i32 0, i32 0, i1 false) ret void } @@ -331,13 +331,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i32_offset_addr64(i32* %ptr) #0 ; GCN-LABEL: {{^}}atomic_inc_shl_base_lds_0_i64: ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 3, {{v[0-9]+}} ; GCN: ds_inc_rtn_u64 v{{\[[0-9]+:[0-9]+\]}}, [[PTR]], v{{\[[0-9]+:[0-9]+\]}} offset:16 -define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 { +define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(ptr addrspace(1) %out, ptr addrspace(1) %add_use) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 2 - %arrayidx0 = getelementptr inbounds [512 x i64], [512 x i64] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %arrayidx0, i64 9, i32 0, i32 0, i1 false) - store i32 %idx.0, i32 addrspace(1)* %add_use - store i64 %val0, i64 addrspace(1)* %out + %arrayidx0 = getelementptr inbounds [512 x i64], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val0 = call i64 @llvm.amdgcn.atomic.inc.i64.p3(ptr addrspace(3) %arrayidx0, i64 9, i32 0, i32 0, i1 false) + store i32 %idx.0, ptr addrspace(1) %add_use + store i64 %val0, ptr addrspace(1) %out ret void } @@ -345,9 +345,9 @@ define amdgpu_kernel void @atomic_inc_shl_base_lds_0_i64(i64 addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i64(ptr %out, ptr %ptr) #0 { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } @@ -356,10 +356,10 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64(i64* %out, i64* %ptr) #0 { ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32 glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) #0 { - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(ptr %out, ptr %ptr) #0 { + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out ret void } @@ -367,8 +367,8 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset(i64* %out, i64* %ptr) ; GCN-DAG: v_mov_b32_e32 v[[KLO:[0-9]+]], 42 ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; GCN: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %ptr, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i64(ptr %ptr) nounwind { + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %ptr, i64 42, i32 0, i32 0, i1 false) ret void } @@ -377,9 +377,9 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64(i64* %ptr) nounwind { ; GCN-DAG: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:32{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind { - %gep = getelementptr i64, i64* %ptr, i32 4 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(ptr %ptr) nounwind { + %gep = getelementptr i64, ptr %ptr, i32 4 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -388,13 +388,13 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset(i64* %ptr) nounwind ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] glc{{$}} ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40 glc{{$}} -define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(ptr %out, ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %out.gep = getelementptr i64, i64* %out, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) - store i64 %result, i64* %out.gep + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %out.gep = getelementptr i64, ptr %out, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) + store i64 %result, ptr %out.gep ret void } @@ -403,11 +403,11 @@ define amdgpu_kernel void @flat_atomic_inc_ret_i64_offset_addr64(i64* %out, i64* ; GCN: v_mov_b32_e32 v[[KHI:[0-9]+]], 0{{$}} ; CIVI: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]{{\]$}} ; GFX9: flat_atomic_inc_x2 v{{\[[0-9]+:[0-9]+\]}}, v[[[KLO]]:[[KHI]]] offset:40{{$}} -define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 { +define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(ptr %ptr) #0 { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep.tid = getelementptr i64, i64* %ptr, i32 %id - %gep = getelementptr i64, i64* %gep.tid, i32 5 - %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0i64(i64* %gep, i64 42, i32 0, i32 0, i1 false) + %gep.tid = getelementptr i64, ptr %ptr, i32 %id + %gep = getelementptr i64, ptr %gep.tid, i32 5 + %result = call i64 @llvm.amdgcn.atomic.inc.i64.p0(ptr %gep, i64 42, i32 0, i32 0, i1 false) ret void } @@ -415,12 +415,12 @@ define amdgpu_kernel void @flat_atomic_inc_noret_i64_offset_addr64(i64* %ptr) #0 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 42 ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] ; GCN: ds_inc_rtn_u32 v{{[0-9]+}}, v{{[0-9]+}}, [[K]] -define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(3)* %ptr) #0 { - %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) - %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %ptr, i32 42, i32 0, i32 0, i1 false) +define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(ptr addrspace(1) %out0, ptr addrspace(1) %out1, ptr addrspace(3) %ptr) #0 { + %result0 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) + %result1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3(ptr addrspace(3) %ptr, i32 42, i32 0, i32 0, i1 false) - store i32 %result0, i32 addrspace(1)* %out0 - store i32 %result1, i32 addrspace(1)* %out1 + store i32 %result0, ptr addrspace(1) %out0 + store i32 %result1, ptr addrspace(1) %out1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll index dcf06ae6a723f3..ec68daaa476b12 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -117,12 +117,12 @@ main_body: ; CHECK-LABEL: buffer_load_mmo: ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) { +define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, ptr addrspace(3) %lds) { entry: - store float 0.0, float addrspace(3)* %lds + store float 0.0, ptr addrspace(3) %lds %val = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.0, float addrspace(3)* %tmp2 + %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 + store float 0.0, ptr addrspace(3) %tmp2 ret float %val } @@ -448,7 +448,7 @@ main_body: ; CHECK-NEXT: buffer_load_dword v0, [[FI]], s{{\[[0-9]+:[0-9]+\]}}, 0 idxen define amdgpu_ps float @no_fold_fi_imm_soffset(<4 x i32> inreg %rsrc) { %alloca = alloca i32, addrspace(5) - %alloca.cast = ptrtoint i32 addrspace(5)* %alloca to i32 + %alloca.cast = ptrtoint ptr addrspace(5) %alloca to i32 %ret.val = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %alloca.cast, i32 0, i1 false, i1 false) ret float %ret.val @@ -460,7 +460,7 @@ define amdgpu_ps float @no_fold_fi_imm_soffset(<4 x i32> inreg %rsrc) { ; CHECK: buffer_load_dword v0, v[[[FI]]:[[HI]] define amdgpu_ps float @no_fold_fi_reg_soffset(<4 x i32> inreg %rsrc, i32 inreg %soffset) { %alloca = alloca i32, addrspace(5) - %alloca.cast = ptrtoint i32 addrspace(5)* %alloca to i32 + %alloca.cast = ptrtoint ptr addrspace(5) %alloca to i32 %ret.val = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 %alloca.cast, i32 %soffset, i1 false, i1 false) ret float %ret.val diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll index 05b125fa14f927..4e00a700309735 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.wbinvl1.vol.ll @@ -9,10 +9,10 @@ declare void @llvm.amdgcn.buffer.wbinvl1.vol() #0 ; VI: buffer_wbinvl1_vol ; encoding: [0x00,0x00,0xfc,0xe0,0x00,0x00,0x00,0x00] ; GCN: _store_byte ; GCN-NEXT: s_endpgm -define amdgpu_kernel void @test_buffer_wbinvl1_vol(i8 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @test_buffer_wbinvl1_vol(ptr addrspace(1) %ptr) #0 { call void @llvm.amdgcn.buffer.wbinvl1.vol() ; This used to crash in hazard recognizer - store i8 0, i8 addrspace(1)* %ptr, align 1 + store i8 0, ptr addrspace(1) %ptr, align 1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll index b0f8754b938c99..89dbe9b0e17ca9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.f16.ll @@ -11,15 +11,15 @@ declare i1 @llvm.amdgcn.class.f16(half %a, i32 %b) ; GCN: buffer_store_dword v[[R_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16( - i32 addrspace(1)* %r, - half addrspace(1)* %a, - i32 addrspace(1)* %b) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { entry: - %a.val = load half, half addrspace(1)* %a - %b.val = load i32, i32 addrspace(1)* %b + %a.val = load half, ptr addrspace(1) %a + %b.val = load i32, ptr addrspace(1) %b %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 %b.val) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -32,7 +32,7 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_fabs( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, [8 x i32], half %a.val, [8 x i32], @@ -41,7 +41,7 @@ entry: %a.val.fabs = call half @llvm.fabs.f16(half %a.val) %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs, i32 %b.val) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -54,7 +54,7 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_fneg( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, [8 x i32], half %a.val, [8 x i32], @@ -63,7 +63,7 @@ entry: %a.val.fneg = fsub half -0.0, %a.val %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fneg, i32 %b.val) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -76,7 +76,7 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_fabs_fneg( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, [8 x i32], half %a.val, [8 x i32], @@ -86,7 +86,7 @@ entry: %a.val.fabs.fneg = fsub half -0.0, %a.val.fabs %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val.fabs.fneg, i32 %b.val) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -97,12 +97,12 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_1( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, half %a.val) { entry: %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -113,12 +113,12 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_64( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, half %a.val) { entry: %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 64) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -130,12 +130,12 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_full_mask( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, half %a.val) { entry: %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 1023) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -147,11 +147,11 @@ entry: ; GCN: buffer_store_dword v[[VR_I32]] ; GCN: s_endpgm define amdgpu_kernel void @class_f16_nine_bit_mask( - i32 addrspace(1)* %r, + ptr addrspace(1) %r, half %a.val) { entry: %r.val = call i1 @llvm.amdgcn.class.f16(half %a.val, i32 511) %r.val.sext = sext i1 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll index 311d74b13003cb..98b11985e35a1f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.class.ll @@ -14,10 +14,10 @@ declare double @llvm.fabs.f64(double) #1 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -29,11 +29,11 @@ define amdgpu_kernel void @test_class_f32(i32 addrspace(1)* %out, [8 x i32], flo ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fabs_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 %result = call i1 @llvm.amdgcn.class.f32(float %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -45,11 +45,11 @@ define amdgpu_kernel void @test_class_fabs_f32(i32 addrspace(1)* %out, [8 x i32] ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fneg_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fneg = fsub float -0.0, %a %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -61,12 +61,12 @@ define amdgpu_kernel void @test_class_fneg_f32(i32 addrspace(1)* %out, [8 x i32] ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_fabs_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], i32 %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) #1 %a.fneg.fabs = fsub float -0.0, %a.fabs %result = call i1 @llvm.amdgcn.class.f32(float %a.fneg.fabs, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -76,10 +76,10 @@ define amdgpu_kernel void @test_class_fneg_fabs_f32(i32 addrspace(1)* %out, [8 x ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_1_f32(ptr addrspace(1) %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -89,10 +89,10 @@ define amdgpu_kernel void @test_class_1_f32(i32 addrspace(1)* %out, float %a) #0 ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[COND]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_64_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_64_f32(ptr addrspace(1) %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 64) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -104,10 +104,10 @@ define amdgpu_kernel void @test_class_64_f32(i32 addrspace(1)* %out, float %a) # ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_full_mask_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_full_mask_f32(ptr addrspace(1) %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 1023) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -118,10 +118,10 @@ define amdgpu_kernel void @test_class_full_mask_f32(i32 addrspace(1)* %out, floa ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_9bit_mask_f32(ptr addrspace(1) %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -132,15 +132,15 @@ define amdgpu_kernel void @test_class_9bit_mask_f32(i32 addrspace(1)* %out, floa ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, s[{{[0-9]}}:{{[0-9]}}] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_class_full_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 511) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } @@ -150,15 +150,15 @@ define amdgpu_kernel void @v_test_class_full_mask_f32(i32 addrspace(1)* %out, fl ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %b = load i32, i32 addrspace(1)* %gep.in + %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %b = load i32, ptr addrspace(1) %gep.in %result = call i1 @llvm.amdgcn.class.f32(float 1.0, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } @@ -170,15 +170,15 @@ define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f32(i32 a ; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %b = load i32, i32 addrspace(1)* %gep.in + %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %b = load i32, ptr addrspace(1) %gep.in %result = call i1 @llvm.amdgcn.class.f32(float 1024.0, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } @@ -190,10 +190,10 @@ define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f32(i32 addrspac ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -205,11 +205,11 @@ define amdgpu_kernel void @test_class_f64(i32 addrspace(1)* %out, [8 x i32], dou ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fabs_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 %result = call i1 @llvm.amdgcn.class.f64(double %a.fabs, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -221,11 +221,11 @@ define amdgpu_kernel void @test_class_fabs_f64(i32 addrspace(1)* %out, [8 x i32] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fneg_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fneg = fsub double -0.0, %a %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -237,32 +237,32 @@ define amdgpu_kernel void @test_class_fneg_f64(i32 addrspace(1)* %out, [8 x i32] ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP]] ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_fneg_fabs_f64(i32 addrspace(1)* %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { +define amdgpu_kernel void @test_class_fneg_fabs_f64(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], i32 %b) #0 { %a.fabs = call double @llvm.fabs.f64(double %a) #1 %a.fneg.fabs = fsub double -0.0, %a.fabs %result = call i1 @llvm.amdgcn.class.f64(double %a.fneg.fabs, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_class_1_f64: ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 1{{$}} ; SI: s_endpgm -define amdgpu_kernel void @test_class_1_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_1_f64(ptr addrspace(1) %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 1) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_class_64_f64: ; SI: v_cmp_class_f64_e64 {{s\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 64{{$}} ; SI: s_endpgm -define amdgpu_kernel void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_64_f64(ptr addrspace(1) %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 64) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -275,10 +275,10 @@ define amdgpu_kernel void @test_class_64_f64(i32 addrspace(1)* %out, double %a) ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc ; SI-NEXT: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_full_mask_f64(i32 addrspace(1)* %out, [8 x i32], double %a) #0 { +define amdgpu_kernel void @test_class_full_mask_f64(ptr addrspace(1) %out, [8 x i32], double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -290,15 +290,15 @@ define amdgpu_kernel void @test_class_full_mask_f64(i32 addrspace(1)* %out, [8 x ; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, s[{{[0-9]}}:{{[0-9]}}] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_test_class_full_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load double, double addrspace(1)* %in + %gep.in = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load double, ptr addrspace(1) %in %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 511) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } @@ -306,30 +306,30 @@ define amdgpu_kernel void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, do ; XSI: v_cmp_class_f64_e32 vcc, 1.0, ; SI: v_cmp_class_f64_e32 vcc, ; SI: s_endpgm -define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_inline_imm_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %b = load i32, i32 addrspace(1)* %gep.in + %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %b = load i32, ptr addrspace(1) %gep.in %result = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } ; SI-LABEL: {{^}}test_class_lit_constant_dynamic_mask_f64: ; SI: v_cmp_class_f64_e32 vcc, s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} ; SI: s_endpgm -define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %b = load i32, i32 addrspace(1)* %gep.in + %gep.in = getelementptr i32, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %b = load i32, ptr addrspace(1) %gep.in %result = call i1 @llvm.amdgcn.class.f64(double 1024.0, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %gep.out, align 4 + store i32 %sext, ptr addrspace(1) %gep.out, align 4 ret void } @@ -338,18 +338,18 @@ define amdgpu_kernel void @test_class_lit_constant_dynamic_mask_f64(i32 addrspac ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 3{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define amdgpu_kernel void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 3) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -358,11 +358,11 @@ define amdgpu_kernel void @test_fold_or_class_f32_0(i32 addrspace(1)* %out, floa ; SI: v_cmp_class_f32_e64 s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define amdgpu_kernel void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or3_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 @@ -371,7 +371,7 @@ define amdgpu_kernel void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, flo %or.1 = or i1 %or.0, %class2 %sext = sext i1 %or.1 to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -381,11 +381,11 @@ define amdgpu_kernel void @test_fold_or3_class_f32_0(i32 addrspace(1)* %out, flo ; SI: v_cmp_class_f32_e64 s[0:1], v{{[0-9]+}}, [[MASK]]{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 1) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 2) #1 @@ -407,7 +407,7 @@ define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* %or.7 = or i1 %or.6, %class8 %or.8 = or i1 %or.7, %class9 %sext = sext i1 %or.8 to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -416,18 +416,18 @@ define amdgpu_kernel void @test_fold_or_all_tests_class_f32_0(i32 addrspace(1)* ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 12{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define amdgpu_kernel void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 8) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -436,18 +436,18 @@ define amdgpu_kernel void @test_fold_or_class_f32_1(i32 addrspace(1)* %out, floa ; SI: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, 7{{$}} ; SI-NOT: v_cmp_class ; SI: s_endpgm -define amdgpu_kernel void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, float addrspace(1)* %in) #0 { +define amdgpu_kernel void @test_fold_or_class_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %a, i32 7) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -456,18 +456,18 @@ define amdgpu_kernel void @test_fold_or_class_f32_2(i32 addrspace(1)* %out, floa ; SI-DAG: v_cmp_class_f32_e64 {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}}, 8{{$}} ; SI: s_or_b64 ; SI: s_endpgm -define amdgpu_kernel void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, float addrspace(1)* %in, float %b) #0 { +define amdgpu_kernel void @test_no_fold_or_class_f32_0(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() #1 - %gep.in = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.out = getelementptr i32, i32 addrspace(1)* %out, i32 %tid - %a = load float, float addrspace(1)* %gep.in + %gep.in = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.out = getelementptr i32, ptr addrspace(1) %out, i32 %tid + %a = load float, ptr addrspace(1) %gep.in %class0 = call i1 @llvm.amdgcn.class.f32(float %a, i32 4) #1 %class1 = call i1 @llvm.amdgcn.class.f32(float %b, i32 8) #1 %or = or i1 %class0, %class1 %sext = sext i1 %or to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -476,10 +476,10 @@ define amdgpu_kernel void @test_no_fold_or_class_f32_0(i32 addrspace(1)* %out, f ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_class_0_f32(ptr addrspace(1) %out, float %a) #0 { %result = call i1 @llvm.amdgcn.class.f32(float %a, i32 0) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -488,10 +488,10 @@ define amdgpu_kernel void @test_class_0_f32(i32 addrspace(1)* %out, float %a) #0 ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_0_f64(i32 addrspace(1)* %out, double %a) #0 { +define amdgpu_kernel void @test_class_0_f64(ptr addrspace(1) %out, double %a) #0 { %result = call i1 @llvm.amdgcn.class.f64(double %a, i32 0) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -500,10 +500,10 @@ define amdgpu_kernel void @test_class_0_f64(i32 addrspace(1)* %out, double %a) # ; SI: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0 ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm -define amdgpu_kernel void @test_class_undef_f32(i32 addrspace(1)* %out, float %a, i32 %b) #0 { +define amdgpu_kernel void @test_class_undef_f32(ptr addrspace(1) %out, float %a, i32 %b) #0 { %result = call i1 @llvm.amdgcn.class.f32(float undef, i32 %b) #1 %sext = sext i1 %result to i32 - store i32 %sext, i32 addrspace(1)* %out, align 4 + store i32 %sext, ptr addrspace(1) %out, align 4 ret void } @@ -538,7 +538,7 @@ define i1 @test_fold_and_unord(float %a) { ; SI: s_and_b64 define i1 @test_fold_and_ord_multi_use(float %a) { %class = call i1 @llvm.amdgcn.class.f32(float %a, i32 35) #1 - store volatile i1 %class, i1 addrspace(1)* undef + store volatile i1 %class, ptr addrspace(1) undef %ord = fcmp ord float %a, %a %and = and i1 %ord, %class ret i1 %and diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll index 054388607293ad..1f78dd7493e9d1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.cos.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @cos_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.cos.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll index 7209d94d632a6b..68438cee06b3dd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cos.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.cos.f32(float) #0 ; GCN-LABEL: {{^}}v_cos_f32: ; GCN: v_cos_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @v_cos_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @v_cos_f32(ptr addrspace(1) %out, float %src) #1 { %cos = call float @llvm.amdgcn.cos.f32(float %src) #0 - store float %cos, float addrspace(1)* %out + store float %cos, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll index 2d3e3ee0f8e50d..cb71a99f0e2316 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubeid.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.cubeid(float, float, float) #0 ; GCN-LABEL: {{^}}test_cubeid: ; GCN: v_cubeid_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_cubeid(float addrspace(1)* %out, float %a, float %b, float %c) #1 { +define amdgpu_kernel void @test_cubeid(ptr addrspace(1) %out, float %a, float %b, float %c) #1 { %result = call float @llvm.amdgcn.cubeid(float %a, float %b, float %c) - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll index 48c55e95403375..8ab874295013ec 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubema.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.cubema(float, float, float) #0 ; GCN-LABEL: {{^}}test_cubema: ; GCN: v_cubema_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_cubema(float addrspace(1)* %out, float %a, float %b, float %c) #1 { +define amdgpu_kernel void @test_cubema(ptr addrspace(1) %out, float %a, float %b, float %c) #1 { %result = call float @llvm.amdgcn.cubema(float %a, float %b, float %c) - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll index 07336462a95e2a..eee7bbdeed4592 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubesc.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.cubesc(float, float, float) #0 ; GCN-LABEL: {{^}}test_cubesc: ; GCN: v_cubesc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_cubesc(float addrspace(1)* %out, float %a, float %b, float %c) #1 { +define amdgpu_kernel void @test_cubesc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 { %result = call float @llvm.amdgcn.cubesc(float %a, float %b, float %c) - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll index c3a8c9e3f8fd57..28ce72e31e3e48 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cubetc.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.cubetc(float, float, float) #0 ; GCN-LABEL: {{^}}test_cubetc: ; GCN: v_cubetc_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_cubetc(float addrspace(1)* %out, float %a, float %b, float %c) #1 { +define amdgpu_kernel void @test_cubetc(ptr addrspace(1) %out, float %a, float %b, float %c) #1 { %result = call float @llvm.amdgcn.cubetc(float %a, float %b, float %c) - store float %result, float addrspace(1)* %out + store float %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll index bc9e8efd6f5bf5..70f6bc2f5dcd45 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.i16.ll @@ -8,20 +8,20 @@ ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]] ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] -define amdgpu_kernel void @s_cvt_pk_i16_i32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @s_cvt_pk_i16_i32(ptr addrspace(1) %out, i32 %x, i32 %y) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %y) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_cvt_pk_i16_samereg_i32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] -define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(i32 addrspace(1)* %out, i32 %x) #0 { +define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(ptr addrspace(1) %out, i32 %x) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %x, i32 %x) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } @@ -30,32 +30,32 @@ define amdgpu_kernel void @s_cvt_pk_i16_samereg_i32(i32 addrspace(1)* %out, i32 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, [[A]], [[B]] ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, [[A]], [[B]] -define amdgpu_kernel void @v_cvt_pk_i16_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep - %b = load volatile i32, i32 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep + %b = load volatile i32, ptr addrspace(1) %b.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %a, i32 %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } ; GCN-LABEL: {{^}}v_cvt_pk_i16_i32_reg_imm: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: v_cvt_pk_i16_i32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1 -define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 %a, i32 1) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -63,15 +63,15 @@ define amdgpu_kernel void @v_cvt_pk_i16_i32_reg_imm(i32 addrspace(1)* %out, i32 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pk_i16_i32_e32 v{{[0-9]+}}, 1, [[A]] ; VI: v_cvt_pk_i16_i32 v{{[0-9]+}}, 1, [[A]] -define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_i16_i32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.i16(i32 1, i32 %a) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll index 817eb1089e335a..59c19d9341091b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pk.u16.ll @@ -8,20 +8,20 @@ ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]] ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] -define amdgpu_kernel void @s_cvt_pk_u16_u32(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 { +define amdgpu_kernel void @s_cvt_pk_u16_u32(ptr addrspace(1) %out, i32 %x, i32 %y) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %y) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_cvt_pk_u16_samereg_i32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] -define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(i32 addrspace(1)* %out, i32 %x) #0 { +define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(ptr addrspace(1) %out, i32 %x) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %x, i32 %x) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } @@ -30,32 +30,32 @@ define amdgpu_kernel void @s_cvt_pk_u16_samereg_i32(i32 addrspace(1)* %out, i32 ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, [[A]], [[B]] ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, [[A]], [[B]] -define amdgpu_kernel void @v_cvt_pk_u16_u32(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr, i32 addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_u16_u32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds i32, i32 addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep - %b = load volatile i32, i32 addrspace(1)* %b.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds i32, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep + %b = load volatile i32, ptr addrspace(1) %b.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %a, i32 %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } ; GCN-LABEL: {{^}}v_cvt_pk_u16_u32_reg_imm: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: v_cvt_pk_u16_u32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1 -define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 %a, i32 1) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -63,15 +63,15 @@ define amdgpu_kernel void @v_cvt_pk_u16_u32_reg_imm(i32 addrspace(1)* %out, i32 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pk_u16_u32_e32 v{{[0-9]+}}, 1, [[A]] ; VI: v_cvt_pk_u16_u32 v{{[0-9]+}}, 1, [[A]] -define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(i32 addrspace(1)* %out, i32 addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pk_u16_u32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds i32, i32 addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile i32, i32 addrspace(1)* %a.gep + %a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile i32, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pk.u16(i32 1, i32 %a) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll index 25980e29a9b8fc..aef844984773f8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.i16.ll @@ -8,20 +8,20 @@ ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]] ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] -define amdgpu_kernel void @s_cvt_pknorm_i16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @s_cvt_pknorm_i16_f32(ptr addrspace(1) %out, float %x, float %y) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %y) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_cvt_pknorm_i16_samereg_f32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] -define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(ptr addrspace(1) %out, float %x) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %x, float %x) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } @@ -30,32 +30,32 @@ define amdgpu_kernel void @s_cvt_pknorm_i16_samereg_f32(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, [[A]], [[B]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } ; GCN-LABEL: {{^}}v_cvt_pknorm_i16_f32_reg_imm: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0 -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float 1.0) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -63,15 +63,15 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_reg_imm(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pknorm_i16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] ; VI: v_cvt_pknorm_i16_f32 v{{[0-9]+}}, 1.0, [[A]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float 1.0, float %a) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -79,18 +79,18 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_imm_reg(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.a, float %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -98,18 +98,18 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -117,19 +117,19 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_hi(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -137,20 +137,20 @@ define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_lo_hi(i32 addrspace(1)* %ou ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_i16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_i16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %fabs.a = call float @llvm.fabs.f32(float %a) %neg.fabs.a = fsub float -0.0, %fabs.a %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.i16(float %neg.fabs.a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll index 8f9a49438f6c4a..21f104bcc89a01 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pknorm.u16.ll @@ -8,20 +8,20 @@ ; GCN: v_mov_b32_e32 [[VY:v[0-9]+]], s[[#LOAD + 3]] ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, s[[#LOAD + 2]], [[VY]] -define amdgpu_kernel void @s_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @s_cvt_pknorm_u16_f32(ptr addrspace(1) %out, float %x, float %y) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %y) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_cvt_pknorm_u16_samereg_f32: ; GCN: s_load_dword [[X:s[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[X]], [[X]] -define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(i32 addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(ptr addrspace(1) %out, float %x) #0 { %result = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %x, float %x) %r = bitcast <2 x i16> %result to i32 - store i32 %r, i32 addrspace(1)* %out + store i32 %r, ptr addrspace(1) %out ret void } @@ -30,32 +30,32 @@ define amdgpu_kernel void @s_cvt_pknorm_u16_samereg_f32(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, [[A]], [[B]] ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, [[A]], [[B]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } ; GCN-LABEL: {{^}}v_cvt_pknorm_u16_f32_reg_imm: ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], 1.0 -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float 1.0) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -63,15 +63,15 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_reg_imm(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; SI: v_cvt_pknorm_u16_f32_e32 v{{[0-9]+}}, 1.0, [[A]] ; VI: v_cvt_pknorm_u16_f32 v{{[0-9]+}}, 1.0, [[A]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float 1.0, float %a) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -79,18 +79,18 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_imm_reg(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], [[B]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -98,18 +98,18 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, [[A]], -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -117,19 +117,19 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_hi(i32 addrspace(1)* %out, ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -[[A]], -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } @@ -137,20 +137,20 @@ define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_lo_hi(i32 addrspace(1)* %ou ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat|global}}_load_dword [[B:v[0-9]+]] ; GCN: v_cvt_pknorm_u16_f32{{(_e64)*}} v{{[0-9]+}}, -|[[A]]|, -[[B]] -define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(i32 addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pknorm_u16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds i32, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %fabs.a = call float @llvm.fabs.f32(float %a) %neg.fabs.a = fsub float -0.0, %fabs.a %neg.b = fsub float -0.0, %b %cvt = call <2 x i16> @llvm.amdgcn.cvt.pknorm.u16(float %neg.fabs.a, float %neg.b) %r = bitcast <2 x i16> %cvt to i32 - store i32 %r, i32 addrspace(1)* %out.gep + store i32 %r, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll index 64e442a0ebf532..bc756b4851f2bd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.pkrtz.ll @@ -5,7 +5,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX10 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -enable-var-scope -check-prefixes=GFX11 -define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float %x, float %y) #0 { +define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, float %x, float %y) #0 { ; SI-LABEL: s_cvt_pkrtz_v2f16_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -59,11 +59,11 @@ define amdgpu_kernel void @s_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y) - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1)* %out, float %x) #0 { +define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(ptr addrspace(1) %out, float %x) #0 { ; SI-LABEL: s_cvt_pkrtz_samereg_v2f16_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s4, s[0:1], 0xb @@ -119,11 +119,11 @@ define amdgpu_kernel void @s_cvt_pkrtz_samereg_v2f16_f32(<2 x half> addrspace(1) ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %x) - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out) #0 { +define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(ptr addrspace(1) %out) #0 { ; GCN-LABEL: s_cvt_pkrtz_undef_undef: ; GCN: ; %bb.0: ; GCN-NEXT: s_endpgm @@ -136,11 +136,11 @@ define amdgpu_kernel void @s_cvt_pkrtz_undef_undef(<2 x half> addrspace(1)* %out ; GFX11: ; %bb.0: ; GFX11-NEXT: s_endpgm %result = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef) - store <2 x half> %result, <2 x half> addrspace(1)* %out + store <2 x half> %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -230,17 +230,17 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32(<2 x half> addrspace(1)* %out, ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %b) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_reg_imm: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -309,15 +309,15 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_reg_imm(<2 x half> addrspace(1) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float 1.0) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_imm_reg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -386,15 +386,15 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_imm_reg(<2 x half> addrspace(1) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 1.0, float %a) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -484,18 +484,18 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo(<2 x half> addrspace(1) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %b) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_hi: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -585,18 +585,18 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_hi(<2 x half> addrspace(1) ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.b = fsub float -0.0, %b %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %a, float %neg.b) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_lo_hi: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -686,19 +686,19 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_lo_hi(<2 x half> addrspace ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %neg.a = fsub float -0.0, %a %neg.b = fsub float -0.0, %b %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.a, float %neg.b) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } -define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { +define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 { ; SI-LABEL: v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 @@ -788,16 +788,16 @@ define amdgpu_kernel void @v_cvt_pkrtz_v2f16_f32_fneg_fabs_lo_fneg_hi(<2 x half> ; GFX11-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 - %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext - %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext - %out.gep = getelementptr inbounds <2 x half>, <2 x half> addrspace(1)* %out, i64 %tid.ext - %a = load volatile float, float addrspace(1)* %a.gep - %b = load volatile float, float addrspace(1)* %b.gep + %a.gep = getelementptr inbounds float, ptr addrspace(1) %a.ptr, i64 %tid.ext + %b.gep = getelementptr inbounds float, ptr addrspace(1) %b.ptr, i64 %tid.ext + %out.gep = getelementptr inbounds <2 x half>, ptr addrspace(1) %out, i64 %tid.ext + %a = load volatile float, ptr addrspace(1) %a.gep + %b = load volatile float, ptr addrspace(1) %b.gep %fabs.a = call float @llvm.fabs.f32(float %a) %neg.fabs.a = fsub float -0.0, %fabs.a %neg.b = fsub float -0.0, %b %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %neg.fabs.a, float %neg.b) - store <2 x half> %cvt, <2 x half> addrspace(1)* %out.gep + store <2 x half> %cvt, ptr addrspace(1) %out.gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll index 5279f8e997198e..c4c7ce0e8efa39 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.id.ll @@ -9,9 +9,9 @@ declare i64 @llvm.amdgcn.dispatch.id() #1 ; GCN-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], s6 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], s7 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[LO]]:[[HI]]] -define amdgpu_kernel void @dispatch_id(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @dispatch_id(ptr addrspace(1) %out) #0 { %tmp0 = call i64 @llvm.amdgcn.dispatch.id() - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll index 42826b7466f970..06500adbe365c1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll @@ -6,11 +6,10 @@ ; GCN-LABEL: {{^}}test: ; GCN: enable_sgpr_dispatch_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 -define amdgpu_kernel void @test(i32 addrspace(1)* %out) { - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) { + %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 + %value = load i32, ptr addrspace(4) %dispatch_ptr + store i32 %value, ptr addrspace(1) %out ret void } @@ -20,16 +19,15 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) { ; GCN: s_lshr_b32 s{{[0-9]+}}, s[[REG]], 16 ; GCN-NOT: load_ushort ; GCN: s_endpgm -define amdgpu_kernel void @test2(i32 addrspace(1)* %out) { - %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 - %d1 = getelementptr inbounds i8, i8 addrspace(4)* %dispatch_ptr, i64 6 - %h1 = bitcast i8 addrspace(4)* %d1 to i16 addrspace(4)* - %v1 = load i16, i16 addrspace(4)* %h1 +define amdgpu_kernel void @test2(ptr addrspace(1) %out) { + %dispatch_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 + %d1 = getelementptr inbounds i8, ptr addrspace(4) %dispatch_ptr, i64 6 + %v1 = load i16, ptr addrspace(4) %d1 %e1 = zext i16 %v1 to i32 - store i32 %e1, i32 addrspace(1)* %out + store i32 %e1, ptr addrspace(1) %out ret void } -declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 +declare noalias ptr addrspace(4) @llvm.amdgcn.dispatch.ptr() #0 attributes #0 = { readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll index 7b36521f15b401..b7d1d4e6e58230 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.f16.ll @@ -10,16 +10,16 @@ declare half @llvm.amdgcn.div.fixup.f16(half %a, half %b, half %c) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load volatile half, half addrspace(1)* %a - %b.val = load volatile half, half addrspace(1)* %b - %c.val = load volatile half, half addrspace(1)* %c + %a.val = load volatile half, ptr addrspace(1) %a + %b.val = load volatile half, ptr addrspace(1) %b + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -31,14 +31,14 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_a( - half addrspace(1)* %r, - half addrspace(1)* %b, - half addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %b.val = load volatile half, half addrspace(1)* %b - %c.val = load volatile half, half addrspace(1)* %c + %b.val = load volatile half, ptr addrspace(1) %b + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -50,14 +50,14 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { entry: - %a.val = load volatile half, half addrspace(1)* %a - %c.val = load volatile half, half addrspace(1)* %c + %a.val = load volatile half, ptr addrspace(1) %a + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -69,14 +69,14 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_c( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { entry: - %a.val = load volatile half, half addrspace(1)* %a - %b.val = load volatile half, half addrspace(1)* %b + %a.val = load volatile half, ptr addrspace(1) %a + %b.val = load volatile half, ptr addrspace(1) %b %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half %b.val, half 3.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -87,12 +87,12 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_a_imm_b( - half addrspace(1)* %r, - half addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %c) { entry: - %c.val = load volatile half, half addrspace(1)* %c + %c.val = load volatile half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half 3.0, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -103,12 +103,12 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_b_imm_c( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.div.fixup.f16(half %a.val, half 3.0, half 3.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -119,11 +119,11 @@ entry: ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @div_fixup_f16_imm_a_imm_c( - half addrspace(1)* %r, - half addrspace(1)* %b) { + ptr addrspace(1) %r, + ptr addrspace(1) %b) { entry: - %b.val = load half, half addrspace(1)* %b + %b.val = load half, ptr addrspace(1) %b %r.val = call half @llvm.amdgcn.div.fixup.f16(half 3.0, half %b.val, half 3.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll index 4d9921fd7c3e9c..1f58f18c71e4ae 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fixup.ll @@ -18,16 +18,16 @@ declare double @llvm.amdgcn.div.fixup.f64(double, double, double) nounwind readn ; GCN: v_div_fixup_f32 [[RESULT:v[0-9]+]], [[SA]], [[VB]], [[VC]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm -define amdgpu_kernel void @test_div_fixup_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) nounwind { +define amdgpu_kernel void @test_div_fixup_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) nounwind { %result = call float @llvm.amdgcn.div.fixup.f32(float %a, float %b, float %c) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_div_fixup_f64: ; GCN: v_div_fixup_f64 -define amdgpu_kernel void @test_div_fixup_f64(double addrspace(1)* %out, double %a, double %b, double %c) nounwind { +define amdgpu_kernel void @test_div_fixup_f64(ptr addrspace(1) %out, double %a, double %b, double %c) nounwind { %result = call double @llvm.amdgcn.div.fixup.f64(double %a, double %b, double %c) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll index 113f121b2519cf..22b801c0902b69 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.fmas.ll @@ -23,9 +23,9 @@ declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) nounwind re ; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] ; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], [[VC]] ; GCN: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -36,9 +36,9 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32] ; SI-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; SI: v_div_fmas_f32 [[RESULT:v[0-9]+]], 1.0, [[VB]], [[VC]] ; SI: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -53,9 +53,9 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o ; GCN-DAG: v_mov_b32_e32 [[VA:v[0-9]+]], [[SA]] ; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], 1.0, [[VC]] ; GCN: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -70,45 +70,45 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o ; GCN-DAG: v_mov_b32_e32 [[VB:v[0-9]+]], [[SB]] ; GCN: v_div_fmas_f32 [[RESULT:v[0-9]+]], [[VA]], [[VB]], 1.0 ; GCN: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_div_fmas_f64: ; GCN: v_div_fmas_f64 -define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) nounwind { %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } ; GCN-LABEL: {{^}}test_div_fmas_f32_cond_to_vcc: ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}} ; GCN: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} -define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) nounwind { %cmp = icmp eq i32 %i, 0 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_div_fmas_f32_imm_false_cond_to_vcc: ; GCN: s_mov_b64 vcc, 0 ; GCN: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} -define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_div_fmas_f32_imm_true_cond_to_vcc: ; GCN: s_mov_b64 vcc, -1 ; GCN: v_div_fmas_f32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} -define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c) nounwind { %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -123,23 +123,23 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac ; SI: s_and_b64 vcc, [[CMP0]], [[CMP1]] ; SI: v_div_fmas_f32 {{v[0-9]+}}, [[A]], [[B]], [[C]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 %d) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %d) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 - %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 - %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 + %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 + %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 - %a = load volatile float, float addrspace(1)* %gep.a - %b = load volatile float, float addrspace(1)* %gep.b - %c = load volatile float, float addrspace(1)* %gep.c + %a = load volatile float, ptr addrspace(1) %gep.a + %b = load volatile float, ptr addrspace(1) %gep.b + %c = load volatile float, ptr addrspace(1) %gep.c %cmp0 = icmp eq i32 %tid, 0 %cmp1 = icmp ne i32 %d, 0 %and = and i1 %cmp0, %cmp1 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) nounwind readnone - store float %result, float addrspace(1)* %gep.out, align 4 + store float %result, ptr addrspace(1) %gep.out, align 4 ret void } @@ -162,29 +162,29 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace ; SI: buffer_store_dword ; SI: s_endpgm -define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, float addrspace(1)* %in, i32 addrspace(1)* %dummy) nounwind { +define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, ptr addrspace(1) %dummy) nounwind { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.out = getelementptr float, float addrspace(1)* %out, i32 2 - %gep.a = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.b = getelementptr float, float addrspace(1)* %gep.a, i32 1 - %gep.c = getelementptr float, float addrspace(1)* %gep.a, i32 2 + %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 + %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 + %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 - %a = load float, float addrspace(1)* %gep.a - %b = load float, float addrspace(1)* %gep.b - %c = load float, float addrspace(1)* %gep.c + %a = load float, ptr addrspace(1) %gep.a + %b = load float, ptr addrspace(1) %gep.b + %c = load float, ptr addrspace(1) %gep.c %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %bb, label %exit bb: - %val = load i32, i32 addrspace(1)* %dummy + %val = load i32, ptr addrspace(1) %dummy %cmp1 = icmp ne i32 %val, 0 br label %exit exit: %cond = phi i1 [false, %entry], [%cmp1, %bb] %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) nounwind readnone - store float %result, float addrspace(1)* %gep.out, align 4 + store float %result, ptr addrspace(1) %gep.out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll index 8f3d77b3ff7fdb..d8e1da4893c797 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -11,17 +11,17 @@ declare float @llvm.fabs.f32(float) #1 ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_1(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -31,17 +31,17 @@ define amdgpu_kernel void @test_div_scale_f32_1(float addrspace(1)* %out, float ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_2(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -51,17 +51,17 @@ define amdgpu_kernel void @test_div_scale_f32_2(float addrspace(1)* %out, float ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile double, double addrspace(1)* %gep.0, align 8 - %b = load volatile double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, ptr addrspace(1) %gep.0, align 8 + %b = load volatile double, ptr addrspace(1) %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -71,17 +71,17 @@ define amdgpu_kernel void @test_div_scale_f64_1(double addrspace(1)* %out, doubl ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, double addrspace(1)* %aptr, double addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f64_2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr double, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr double, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile double, double addrspace(1)* %gep.0, align 8 - %b = load volatile double, double addrspace(1)* %gep.1, align 8 + %a = load volatile double, ptr addrspace(1) %gep.0, align 8 + %b = load volatile double, ptr addrspace(1) %gep.1, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -91,15 +91,15 @@ define amdgpu_kernel void @test_div_scale_f64_2(double addrspace(1)* %out, doubl ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { +define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %b = load float, float addrspace(1)* %gep, align 4 + %b = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -109,15 +109,15 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_1(float addrspace(1)* % ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* %out, float addrspace(1)* %in, float %a) nounwind { +define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %a) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %b = load float, float addrspace(1)* %gep, align 4 + %b = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -127,15 +127,15 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_num_2(float addrspace(1)* % ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { +define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %a = load float, float addrspace(1)* %gep, align 4 + %a = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -145,15 +145,15 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_1(float addrspace(1)* % ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* %out, float addrspace(1)* %in, float %b) nounwind { +define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, float %b) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr float, float addrspace(1)* %in, i32 %tid + %gep = getelementptr float, ptr addrspace(1) %in, i32 %tid - %a = load float, float addrspace(1)* %gep, align 4 + %a = load float, ptr addrspace(1) %gep, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -163,15 +163,15 @@ define amdgpu_kernel void @test_div_scale_f32_scalar_den_2(float addrspace(1)* % ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { +define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(ptr addrspace(1) %out, ptr addrspace(1) %in, double %a) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %b = load double, double addrspace(1)* %gep, align 8 + %b = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -181,15 +181,15 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(double addrspace(1)* ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* %out, double addrspace(1)* %in, double %a) nounwind { +define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(ptr addrspace(1) %out, ptr addrspace(1) %in, double %a) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %b = load double, double addrspace(1)* %gep, align 8 + %b = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -199,15 +199,15 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(double addrspace(1)* ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { +define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(ptr addrspace(1) %out, ptr addrspace(1) %in, double %b) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %a = load double, double addrspace(1)* %gep, align 8 + %a = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -217,15 +217,15 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(double addrspace(1)* ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* %out, double addrspace(1)* %in, double %b) nounwind { +define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(ptr addrspace(1) %out, ptr addrspace(1) %in, double %b) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep = getelementptr double, double addrspace(1)* %in, i32 %tid + %gep = getelementptr double, ptr addrspace(1) %in, i32 %tid - %a = load double, double addrspace(1)* %gep, align 8 + %a = load double, ptr addrspace(1) %gep, align 8 %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -236,10 +236,10 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(double addrspace(1)* ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[VA]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { +define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -250,10 +250,10 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* % ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[VB]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { +define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -265,10 +265,10 @@ define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* % ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], v[[[VA_LO]]:[[VA_HI]]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { +define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -280,10 +280,10 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* ; SI: v_div_scale_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], v[[[VB_LO]]:[[VB_HI]]], [[A]] ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { +define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(ptr addrspace(1) %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } @@ -292,14 +292,14 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[A]], [[A]], 1.0 ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %a = load float, ptr addrspace(1) %gep.0, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -308,14 +308,14 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_num(float addrspace(1)* ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], 2.0, 2.0, [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %a = load float, float addrspace(1)* %gep.0, align 4 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %a = load float, ptr addrspace(1) %gep.0, align 4 %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -325,19 +325,19 @@ define amdgpu_kernel void @test_div_scale_f32_inline_imm_den(float addrspace(1)* ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], -[[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_fneg_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %a.fneg = fneg float %a %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fneg, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -348,19 +348,19 @@ define amdgpu_kernel void @test_div_scale_f32_fneg_num(float addrspace(1)* %out, ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[B]], [[B]], [[ABS_A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_fabs_num(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -370,19 +370,19 @@ define amdgpu_kernel void @test_div_scale_f32_fabs_num(float addrspace(1)* %out, ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], -[[B]], -[[B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_fneg_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %b.fneg = fneg float %b %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fneg, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -393,49 +393,49 @@ define amdgpu_kernel void @test_div_scale_f32_fneg_den(float addrspace(1)* %out, ; SI: v_div_scale_f32 [[RESULT0:v[0-9]+]], [[RESULT1:s\[[0-9]+:[0-9]+\]]], [[ABS_B]], [[ABS_B]], [[A]] ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm -define amdgpu_kernel void @test_div_scale_f32_fabs_den(float addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @test_div_scale_f32_fabs_den(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone - %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid - %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 + %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid + %gep.1 = getelementptr float, ptr addrspace(1) %gep.0, i32 1 - %a = load volatile float, float addrspace(1)* %gep.0, align 4 - %b = load volatile float, float addrspace(1)* %gep.1, align 4 + %a = load volatile float, ptr addrspace(1) %gep.0, align 4 + %b = load volatile float, ptr addrspace(1) %gep.1, align 4 %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_div_scale_f32_val_undef_val: ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]] -define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_val_undef_val(ptr addrspace(1) %out) #0 { %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_div_scale_f32_undef_val_val: ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}} -define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_undef_val_val(ptr addrspace(1) %out) #0 { %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_div_scale_f32_undef_undef_val: ; SI-NOT: v0 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0 -define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %out) #0 { %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) %result0 = extractvalue { float, i1 } %result, 0 - store float %result0, float addrspace(1)* %out, align 4 + store float %result0, ptr addrspace(1) %out, align 4 ret void } @@ -443,10 +443,10 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1) ; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s[[[K_LO]]:[[K_HI]]], v[0:1], s[[[K_LO]]:[[K_HI]]] -define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 { %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false) %result0 = extractvalue { double, i1 } %result, 0 - store double %result0, double addrspace(1)* %out, align 8 + store double %result0, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll index 76751028868eb5..9c0bacd274d084 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.add.gs.reg.rtn.ll @@ -20,7 +20,7 @@ define amdgpu_gs void @test_add_32(i32 %arg) { ret void } -define amdgpu_gs void @test_add_32_use(i32 %arg, i32 addrspace(1)* %out) { +define amdgpu_gs void @test_add_32_use(i32 %arg, ptr addrspace(1) %out) { ; CHECK-LABEL: test_add_32_use: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -34,7 +34,7 @@ define amdgpu_gs void @test_add_32_use(i32 %arg, i32 addrspace(1)* %out) { ; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; CHECK-NEXT: s_endpgm %res = call i32 @llvm.amdgcn.ds.add.gs.reg.rtn.i32(i32 %arg, i32 16) - store i32 %res, i32 addrspace(1)* %out, align 4 + store i32 %res, ptr addrspace(1) %out, align 4 ret void } @@ -53,7 +53,7 @@ define amdgpu_gs void @test_add_64(i32 %arg) { ret void } -define amdgpu_gs void @test_add_64_use(i32 %arg, i64 addrspace(1)* %out) { +define amdgpu_gs void @test_add_64_use(i32 %arg, ptr addrspace(1) %out) { ; CHECK-LABEL: test_add_64_use: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -67,6 +67,6 @@ define amdgpu_gs void @test_add_64_use(i32 %arg, i64 addrspace(1)* %out) { ; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; CHECK-NEXT: s_endpgm %res = call i64 @llvm.amdgcn.ds.add.gs.reg.rtn.i64(i32 %arg, i32 32) - store i64 %res, i64 addrspace(1)* %out, align 4 + store i64 %res, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll index 3005437edd73ec..40c014d360edcc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.append.ll @@ -13,9 +13,9 @@ ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_lds(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -25,10 +25,10 @@ define amdgpu_kernel void @ds_append_lds(i32 addrspace(3)* %lds, i32 addrspace(1 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_lds_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -44,11 +44,11 @@ define amdgpu_kernel void @ds_append_lds_max_offset(i32 addrspace(3)* %lds, i32 ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspace(4)* %lds.ptr, i32 addrspace(1)* %out) #0 { - %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* %lds.ptr, align 4 - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 4 - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) #0 { + %lds = load ptr addrspace(3), ptr addrspace(4) %lds.ptr, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 4 + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -65,10 +65,10 @@ define amdgpu_kernel void @ds_append_no_fold_offset_si(i32 addrspace(3)* addrspa ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16384 - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16384 + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -81,9 +81,9 @@ define amdgpu_kernel void @ds_append_lds_over_max_offset(i32 addrspace(3)* %lds, ; GCN: ds_append [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define void @ds_append_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define void @ds_append_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -93,9 +93,9 @@ define void @ds_append_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* % ; GCN: ds_append [[RESULT:v[0-9]+]] gds{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_gds(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -105,19 +105,19 @@ define amdgpu_kernel void @ds_append_gds(i32 addrspace(2)* %gds, i32 addrspace(1 ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532 gds{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_append_gds_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}ds_append_gds_over_max_offset: ; GCN-NOT: buffer_wbinvl1 -define amdgpu_kernel void @ds_append_gds_over_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16384 - %val = call i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_append_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16384 + %val = call i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -130,10 +130,10 @@ define amdgpu_kernel void @ds_append_gds_over_max_offset(i32 addrspace(2)* %gds, ; GFX9-NOT: m0 ; GCN: _store_dword ; GCN: ds_read_b32 -define amdgpu_kernel void @ds_append_lds_m0_restore(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val0 = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val0, i32 addrspace(1)* %out - %val1 = load volatile i32, i32 addrspace(3)* %lds +define amdgpu_kernel void @ds_append_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val0 = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val0, ptr addrspace(1) %out + %val1 = load volatile i32, ptr addrspace(3) %lds ret void } @@ -142,14 +142,14 @@ define amdgpu_kernel void @ds_append_lds_m0_restore(i32 addrspace(3)* %lds, i32 ; GCN: s_load_dword [[PTR:s[0-9]+]] ; GCN: s_mov_b32 m0, [[PTR]] ; GCN: ds_append [[RESULT:v[0-9]+]] offset:65532{{$}} -define amdgpu_kernel void @ds_append_lds_no_use(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* %gep, i1 false) +define amdgpu_kernel void @ds_append_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) %gep, i1 false) ret void } -declare i32 @llvm.amdgcn.ds.append.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1 -declare i32 @llvm.amdgcn.ds.append.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.append.p3(ptr addrspace(3) nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.append.p2(ptr addrspace(2) nocapture, i1 immarg) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly convergent nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll index f6f3a36d13586f..90e18a881340b3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bpermute.ll @@ -4,49 +4,49 @@ declare i32 @llvm.amdgcn.ds.bpermute(i32, i32) #0 ; CHECK-LABEL: {{^}}ds_bpermute: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @ds_bpermute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { +define amdgpu_kernel void @ds_bpermute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind { %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}ds_bpermute_imm_offset: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -define amdgpu_kernel void @ds_bpermute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { +define amdgpu_kernel void @ds_bpermute_imm_offset(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %index = add i32 %base_index, 4 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}ds_bpermute_imm_index: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:64 -define amdgpu_kernel void @ds_bpermute_imm_index(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { +define amdgpu_kernel void @ds_bpermute_imm_index(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 64, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}ds_bpermute_add_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 ; CHECK: s_waitcnt lgkmcnt -define void @ds_bpermute_add_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { +define void @ds_bpermute_add_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %index = add i32 %base_index, 1 %byte_index = shl i32 %index, 2 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}ds_bpermute_or_shl: ; CHECK: ds_bpermute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 ; CHECK: s_waitcnt lgkmcnt -define void @ds_bpermute_or_shl(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { +define void @ds_bpermute_or_shl(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %masked = and i32 %base_index, 62 %index = or i32 %masked, 1 %byte_index = shl i32 %index, 2 %bpermute = call i32 @llvm.amdgcn.ds.bpermute(i32 %byte_index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll index 085eb9dee01695..20c4b044b5b553 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.bvh.stack.rtn.ll @@ -4,7 +4,7 @@ declare { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32, i32, <4 x i32>, i32 immarg) -define amdgpu_gs void @test_ds_bvh_stack(i32 %addr, i32 %data0, <4 x i32> %data1, i32 addrspace(1)* %out) { +define amdgpu_gs void @test_ds_bvh_stack(i32 %addr, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { ; CHECK-LABEL: test_ds_bvh_stack: ; CHECK: ; %bb.0: ; CHECK-NEXT: ds_bvh_stack_rtn_b32 v1, v0, v1, v[2:5] @@ -17,11 +17,11 @@ define amdgpu_gs void @test_ds_bvh_stack(i32 %addr, i32 %data0, <4 x i32> %data1 %vdst = extractvalue { i32, i32 } %pair, 0 %newaddr = extractvalue { i32, i32 } %pair, 1 %res = add i32 %vdst, %newaddr - store i32 %res, i32 addrspace(1)* %out, align 4 + store i32 %res, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_gs void @test_ds_bvh_stack_1(i32 %addr, i32 %data0, <4 x i32> %data1, i32 addrspace(1)* %out) { +define amdgpu_gs void @test_ds_bvh_stack_1(i32 %addr, i32 %data0, <4 x i32> %data1, ptr addrspace(1) %out) { ; CHECK-LABEL: test_ds_bvh_stack_1: ; CHECK: ; %bb.0: ; CHECK-NEXT: ds_bvh_stack_rtn_b32 v1, v0, v1, v[2:5] offset:1 @@ -34,6 +34,6 @@ define amdgpu_gs void @test_ds_bvh_stack_1(i32 %addr, i32 %data0, <4 x i32> %dat %vdst = extractvalue { i32, i32 } %pair, 0 %newaddr = extractvalue { i32, i32 } %pair, 1 %res = add i32 %vdst, %newaddr - store i32 %res, i32 addrspace(1)* %out, align 4 + store i32 %res, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll index 59c6549ad6ad9f..a0d2f0e54cd336 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.consume.ll @@ -13,9 +13,9 @@ ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_lds(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_lds(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -25,10 +25,10 @@ define amdgpu_kernel void @ds_consume_lds(i32 addrspace(3)* %lds, i32 addrspace( ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_lds_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_lds_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -44,11 +44,11 @@ define amdgpu_kernel void @ds_consume_lds_max_offset(i32 addrspace(3)* %lds, i32 ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_no_fold_offset_si(i32 addrspace(3)* addrspace(4)* %lds.ptr, i32 addrspace(1)* %out) #0 { - %lds = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* %lds.ptr, align 4 - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 4 - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_no_fold_offset_si(ptr addrspace(4) %lds.ptr, ptr addrspace(1) %out) #0 { + %lds = load ptr addrspace(3), ptr addrspace(4) %lds.ptr, align 4 + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 4 + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -63,10 +63,10 @@ define amdgpu_kernel void @ds_consume_no_fold_offset_si(i32 addrspace(3)* addrsp ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_lds_over_max_offset(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16384 - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_lds_over_max_offset(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16384 + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -77,9 +77,9 @@ define amdgpu_kernel void @ds_consume_lds_over_max_offset(i32 addrspace(3)* %lds ; GCN: ds_consume [[RESULT:v[0-9]+]]{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define void @ds_consume_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define void @ds_consume_lds_vgpr_addr(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -89,9 +89,9 @@ define void @ds_consume_lds_vgpr_addr(i32 addrspace(3)* %lds, i32 addrspace(1)* ; GCN: ds_consume [[RESULT:v[0-9]+]] gds{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_gds(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %val = call i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* %gds, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_gds(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gds, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -101,19 +101,19 @@ define amdgpu_kernel void @ds_consume_gds(i32 addrspace(2)* %gds, i32 addrspace( ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532 gds{{$}} ; GCN-NOT: buffer_wbinvl1 ; GCN: {{.*}}store{{.*}} [[RESULT]] -define amdgpu_kernel void @ds_consume_gds_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_gds_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}ds_consume_gds_over_max_offset: ; GCN-NOT: buffer_wbinvl1 -define amdgpu_kernel void @ds_consume_gds_over_max_offset(i32 addrspace(2)* %gds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(2)* %gds, i32 16384 - %val = call i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* %gep, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_consume_gds_over_max_offset(ptr addrspace(2) %gds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(2) %gds, i32 16384 + %val = call i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) %gep, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -126,10 +126,10 @@ define amdgpu_kernel void @ds_consume_gds_over_max_offset(i32 addrspace(2)* %gds ; GFX9-NOT: m0 ; GCN: _store_dword ; GCN: ds_read_b32 -define amdgpu_kernel void @ds_consume_lds_m0_restore(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %val0 = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %lds, i1 false) - store i32 %val0, i32 addrspace(1)* %out - %val1 = load volatile i32, i32 addrspace(3)* %lds +define amdgpu_kernel void @ds_consume_lds_m0_restore(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %val0 = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %lds, i1 false) + store i32 %val0, ptr addrspace(1) %out + %val1 = load volatile i32, ptr addrspace(3) %lds ret void } @@ -138,14 +138,14 @@ define amdgpu_kernel void @ds_consume_lds_m0_restore(i32 addrspace(3)* %lds, i32 ; GCN: s_load_dword [[PTR:s[0-9]+]] ; GCN: s_mov_b32 m0, [[PTR]] ; GCN: ds_consume [[RESULT:v[0-9]+]] offset:65532{{$}} -define amdgpu_kernel void @ds_consume_lds_no_use(i32 addrspace(3)* %lds, i32 addrspace(1)* %out) #0 { - %gep = getelementptr inbounds i32, i32 addrspace(3)* %lds, i32 16383 - %val = call i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* %gep, i1 false) +define amdgpu_kernel void @ds_consume_lds_no_use(ptr addrspace(3) %lds, ptr addrspace(1) %out) #0 { + %gep = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 16383 + %val = call i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) %gep, i1 false) ret void } -declare i32 @llvm.amdgcn.ds.consume.p3i32(i32 addrspace(3)* nocapture, i1 immarg) #1 -declare i32 @llvm.amdgcn.ds.consume.p2i32(i32 addrspace(2)* nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.consume.p3(ptr addrspace(3) nocapture, i1 immarg) #1 +declare i32 @llvm.amdgcn.ds.consume.p2(ptr addrspace(2) nocapture, i1 immarg) #1 attributes #0 = { nounwind } attributes #1 = { argmemonly convergent nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll index d7a4ba9dc5eb29..557683bac0c371 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll @@ -144,9 +144,9 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 { ; LOOP: s_mov_b32 m0, -1 ; LOOP: ds_write_b32 define amdgpu_kernel void @gws_barrier_save_m0_barrier_constant_offset(i32 %val) #0 { - store i32 1, i32 addrspace(3)* @lds + store i32 1, ptr addrspace(3) @lds call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 10) - store i32 2, i32 addrspace(3)* @lds + store i32 2, ptr addrspace(3) @lds ret void } @@ -165,8 +165,8 @@ define void @gws_barrier_lgkmcnt(i32 %val) { ; GCN-LABEL: {{^}}gws_barrier_wait_before: ; NOLOOP: s_waitcnt ; NOLOOP-NOT: s_waitcnt{{$}} -define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, i32 addrspace(1)* %ptr) #0 { - store i32 0, i32 addrspace(1)* %ptr +define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, ptr addrspace(1) %ptr) #0 { + store i32 0, ptr addrspace(1) %ptr call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7) ret void } @@ -176,9 +176,9 @@ define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, i32 addrspace(1)* % ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; NOLOOP: load_{{dword|b32}} -define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, ptr addrspace(1) %ptr) #0 { call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7) - %load = load volatile i32, i32 addrspace(1)* %ptr + %load = load volatile i32, ptr addrspace(1) %ptr ret void } @@ -189,8 +189,8 @@ define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %p ; NOLOOP: s_waitcnt vmcnt(0) lgkmcnt(0) ; NOLOOP: ds_gws_barrier v{{[0-9]+}} offset:7 gds ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, i32 addrspace(1)* %ptr) #0 { - store i32 0, i32 addrspace(1)* %ptr +define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, ptr addrspace(1) %ptr) #0 { + store i32 0, ptr addrspace(1) %ptr fence release call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7) ret void @@ -204,10 +204,10 @@ define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, i32 addrspace(1)* ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; NOLOOP-NEXT: load_{{dword|b32}} -define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, i32 addrspace(1)* %ptr) #0 { +define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, ptr addrspace(1) %ptr) #0 { call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7) fence release - %load = load volatile i32, i32 addrspace(1)* %ptr + %load = load volatile i32, ptr addrspace(1) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll index f87a3eaad63a96..f658ab39f771fb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll @@ -137,9 +137,9 @@ define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 { ; LOOP: s_mov_b32 m0, -1 ; LOOP: ds_write_b32 define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) #0 { - store volatile i32 1, i32 addrspace(3)* @lds + store volatile i32 1, ptr addrspace(3) @lds call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 10) - store i32 2, i32 addrspace(3)* @lds + store i32 2, ptr addrspace(3) @lds ret void } @@ -159,8 +159,8 @@ define void @gws_init_lgkmcnt(i32 %val) { ; NOLOOP-NOT: s_waitcnt ; NOLOOP: ds_gws_init ; NOLOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -define amdgpu_kernel void @gws_init_wait_before(i32 %val, i32 addrspace(1)* %ptr) #0 { - store i32 0, i32 addrspace(1)* %ptr +define amdgpu_kernel void @gws_init_wait_before(i32 %val, ptr addrspace(1) %ptr) #0 { + store i32 0, ptr addrspace(1) %ptr call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll index 6a9d10fbfb3dab..fd501ef46d9845 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx10.ll @@ -7,9 +7,9 @@ ; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds -define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) + store i32 %val, ptr addrspace(1) %out ret void } @@ -17,10 +17,10 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr ; GCN-DAG: v_{{(dual_)?}}mov_b32{{(_e32)?}} v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:49924 gds -define amdgpu_kernel void @ds_ordered_add_4dw(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 67108865, i1 true, i1 true) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add_4dw(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 67108865, i1 true, i1 true) + store i32 %val, ptr addrspace(1) %out ret void } -declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll index 6d5cc7a0b14fce..a9c2c279038988 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll @@ -5,9 +5,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds -define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) + store i32 %val, ptr addrspace(1) %out ret void } @@ -16,8 +16,8 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr ; GCN: s_mov_b32 m0, s0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) +define amdgpu_cs float @ds_ordered_add_cs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -27,8 +27,8 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { ; GCN: s_mov_b32 m0, s0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) +define amdgpu_ps float @ds_ordered_add_ps(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -38,8 +38,8 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { ; GCN: s_mov_b32 m0, s0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) +define amdgpu_vs float @ds_ordered_add_vs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -49,10 +49,10 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { ; GCN: s_mov_b32 m0, s0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) +define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } -declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll index 76bd2270a47bfa..aa83d8e67ad91a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll @@ -11,9 +11,9 @@ ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds -define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + store i32 %val, ptr addrspace(1) %out ret void } @@ -23,9 +23,9 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds -define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add_counter2(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true) + store i32 %val, ptr addrspace(1) %out ret void } @@ -33,9 +33,9 @@ define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds -define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add_nodone(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -43,9 +43,9 @@ define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i ; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31 ; GCN-DAG: s_mov_b32 m0, ; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds -define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false) - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @ds_ordered_add_norelease(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false) + store i32 %val, ptr addrspace(1) %out ret void } @@ -55,8 +55,8 @@ define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds ; VIGFX9-NEXT: s_nop 0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) +define amdgpu_cs float @ds_ordered_add_cs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -68,7 +68,7 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) { ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define float @ds_ordered_add_default_cc() { - %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -80,7 +80,7 @@ define float @ds_ordered_add_default_cc() { ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define fastcc float @ds_ordered_add_fastcc() { - %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -92,7 +92,7 @@ define fastcc float @ds_ordered_add_fastcc() { ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) define float @ds_ordered_add_func() { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -103,8 +103,8 @@ define float @ds_ordered_add_func() { ; VIGFX9-NEXT: s_nop 0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) +define amdgpu_ps float @ds_ordered_add_ps(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -115,8 +115,8 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) { ; VIGFX9-NEXT: s_nop 0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) +define amdgpu_vs float @ds_ordered_add_vs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -127,10 +127,10 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) { ; VIGFX9-NEXT: s_nop 0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) { - %val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) +define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) { + %val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } -declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll index 76266919b5ac98..87bc6e4b444231 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll @@ -12,8 +12,8 @@ ; VIGFX9-NEXT: s_nop 0 ; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds ; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0) -define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) { - %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) +define amdgpu_cs float @ds_ordered_swap(ptr addrspace(2) inreg %gds, i32 %value) { + %val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) %r = bitcast i32 %val to float ret float %r } @@ -31,13 +31,13 @@ define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value ; GCN-NEXT: s_waitcnt expcnt(0) ; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]] ; GCN-NEXT: s_waitcnt lgkmcnt(0) -define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) { +define amdgpu_cs float @ds_ordered_swap_conditional(ptr addrspace(2) inreg %gds, i32 %value) { entry: %c = icmp ne i32 %value, 0 br i1 %c, label %if-true, label %endif if-true: - %val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) + %val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true) br label %endif endif: @@ -46,4 +46,4 @@ endif: ret float %r } -declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1) +declare i32 @llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll index 63618c3aed7759..6581e251b416a1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll @@ -4,18 +4,18 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0 ; CHECK-LABEL: {{^}}ds_permute: ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind { +define amdgpu_kernel void @ds_permute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind { %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}ds_permute_imm_offset: ; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4 -define amdgpu_kernel void @ds_permute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind { +define amdgpu_kernel void @ds_permute_imm_offset(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind { %index = add i32 %base_index, 4 %bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0 - store i32 %bpermute, i32 addrspace(1)* %out, align 4 + store i32 %bpermute, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll index 8d99b08a86d6a0..7f0f2c305b5de6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll @@ -20,7 +20,7 @@ define amdgpu_gs void @test_sub_32(i32 %arg) { ret void } -define amdgpu_gs void @test_sub_32_use(i32 %arg, i32 addrspace(1)* %out) { +define amdgpu_gs void @test_sub_32_use(i32 %arg, ptr addrspace(1) %out) { ; CHECK-LABEL: test_sub_32_use: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -34,7 +34,7 @@ define amdgpu_gs void @test_sub_32_use(i32 %arg, i32 addrspace(1)* %out) { ; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; CHECK-NEXT: s_endpgm %res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16) - store i32 %res, i32 addrspace(1)* %out, align 4 + store i32 %res, ptr addrspace(1) %out, align 4 ret void } @@ -53,7 +53,7 @@ define amdgpu_gs void @test_sub_64(i32 %arg) { ret void } -define amdgpu_gs void @test_sub_64_use(i32 %arg, i64 addrspace(1)* %out) { +define amdgpu_gs void @test_sub_64_use(i32 %arg, ptr addrspace(1) %out) { ; CHECK-LABEL: test_sub_64_use: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -67,7 +67,7 @@ define amdgpu_gs void @test_sub_64_use(i32 %arg, i64 addrspace(1)* %out) { ; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; CHECK-NEXT: s_endpgm %res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32) - store i64 %res, i64 addrspace(1)* %out, align 4 + store i64 %res, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll index 26e82223d7f0ca..038ba91c0d11bd 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll @@ -5,9 +5,9 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0 ; CHECK-LABEL: {{^}}ds_swizzle: ; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11") -define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind { +define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) nounwind { %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0 - store i32 %swizzle, i32 addrspace(1)* %out, align 4 + store i32 %swizzle, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll index 014e6900e50444..b5172a98357926 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll @@ -629,12 +629,10 @@ define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0 %data0 = alloca <4 x float>, align 8, addrspace(5) %data1 = alloca <4 x float>, align 8, addrspace(5) %cmp = icmp eq i32 %idx, 1 - %data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1 - %sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0 - store float %v, float addrspace(5)* %sptr, align 8 + %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1 + store float %v, ptr addrspace(5) %data, align 8 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false) - %ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0 - %load0 = load float, float addrspace(5)* %ptr0, align 8 + %load0 = load float, ptr addrspace(5) %data0, align 8 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll index cbfbe778b8c361..7fafbffea883aa 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll @@ -12,7 +12,7 @@ declare float @llvm.fabs.f32(float) #0 declare i32 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 declare half @llvm.fabs.f16(half) #0 -define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i32 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) { ; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_with_fabs: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -57,11 +57,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i32 addrspace(1)* %out, floa ; GISEL-GFX10-NEXT: s_endpgm %temp = call float @llvm.fabs.f32(float %a) %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i32 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(1) %out, float %src, float %a) { ; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -107,11 +107,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i32 addrspace( %temp = call float @llvm.fabs.f32(float %a) %src_input = call float @llvm.fabs.f32(float %src) %result = call i32 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -137,11 +137,11 @@ define amdgpu_kernel void @v_fcmp_f32(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oeq(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_oeq(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_oeq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -193,11 +193,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_one(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_one(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_one: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -249,11 +249,11 @@ define amdgpu_kernel void @v_fcmp_f32_one(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ogt(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ogt(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ogt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -305,11 +305,11 @@ define amdgpu_kernel void @v_fcmp_f32_ogt(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oge(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_oge(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_oge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -361,11 +361,11 @@ define amdgpu_kernel void @v_fcmp_f32_oge(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_olt(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_olt(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_olt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -417,11 +417,11 @@ define amdgpu_kernel void @v_fcmp_f32_olt(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ole(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ole: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -473,12 +473,12 @@ define amdgpu_kernel void @v_fcmp_f32_ole(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ueq(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ueq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -530,11 +530,11 @@ define amdgpu_kernel void @v_fcmp_f32_ueq(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_une(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_une(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_une: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -586,11 +586,11 @@ define amdgpu_kernel void @v_fcmp_f32_une(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ugt(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ugt(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -642,11 +642,11 @@ define amdgpu_kernel void @v_fcmp_f32_ugt(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_uge(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_uge(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -698,11 +698,11 @@ define amdgpu_kernel void @v_fcmp_f32_uge(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ult(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ult(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -754,11 +754,11 @@ define amdgpu_kernel void @v_fcmp_f32_ult(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ule(i32 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ule(ptr addrspace(1) %out, float %src) { ; SDAG-GFX11-LABEL: v_fcmp_f32_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -810,11 +810,11 @@ define amdgpu_kernel void @v_fcmp_f32_ule(i32 addrspace(1)* %out, float %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_oeq(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_oeq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -866,11 +866,11 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_one(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_one: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -922,11 +922,11 @@ define amdgpu_kernel void @v_fcmp_f64_one(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ogt(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ogt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -978,11 +978,11 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_oge(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_oge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1034,11 +1034,11 @@ define amdgpu_kernel void @v_fcmp_f64_oge(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_olt(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_olt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1090,11 +1090,11 @@ define amdgpu_kernel void @v_fcmp_f64_olt(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ole(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ole: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1146,11 +1146,11 @@ define amdgpu_kernel void @v_fcmp_f64_ole(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ueq(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ueq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1202,11 +1202,11 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_une(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_une: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1258,11 +1258,11 @@ define amdgpu_kernel void @v_fcmp_f64_une(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ugt(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1314,11 +1314,11 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_uge(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1370,11 +1370,11 @@ define amdgpu_kernel void @v_fcmp_f64_uge(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ult(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1426,11 +1426,11 @@ define amdgpu_kernel void @v_fcmp_f64_ult(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ule(i32 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; SDAG-GFX11-LABEL: v_fcmp_f64_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1482,12 +1482,12 @@ define amdgpu_kernel void @v_fcmp_f64_ule(i32 addrspace(1)* %out, double %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i32 addrspace(1)* %out, half %src, half %a) { +define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half %src, half %a) { ; SDAG-GFX11-LABEL: v_fcmp_f16_oeq_with_fabs: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1544,12 +1544,12 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i32 addrspace(1)* %out, half ; GISEL-GFX10-NEXT: s_endpgm %temp = call half @llvm.fabs.f16(half %a) %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i32 addrspace(1)* %out, half %src, half %a) { +define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(1) %out, half %src, half %a) { ; SDAG-GFX11-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1607,11 +1607,11 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i32 addrspace( %temp = call half @llvm.fabs.f16(half %a) %src_input = call half @llvm.fabs.f16(half %src) %result = call i32 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -1637,12 +1637,12 @@ define amdgpu_kernel void @v_fcmp_f16(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_oeq(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_oeq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1694,12 +1694,12 @@ define amdgpu_kernel void @v_fcmp_f16_oeq(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_one(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_one(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_one: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1751,12 +1751,12 @@ define amdgpu_kernel void @v_fcmp_f16_one(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ogt(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ogt(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ogt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1808,12 +1808,12 @@ define amdgpu_kernel void @v_fcmp_f16_ogt(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oge(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_oge(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_oge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1865,12 +1865,12 @@ define amdgpu_kernel void @v_fcmp_f16_oge(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_olt(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_olt(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_olt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1922,12 +1922,12 @@ define amdgpu_kernel void @v_fcmp_f16_olt(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ole(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ole(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ole: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1979,12 +1979,12 @@ define amdgpu_kernel void @v_fcmp_f16_ole(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ueq(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ueq(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ueq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2036,12 +2036,12 @@ define amdgpu_kernel void @v_fcmp_f16_ueq(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_une(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_une(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_une: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2093,12 +2093,12 @@ define amdgpu_kernel void @v_fcmp_f16_une(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ugt(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ugt(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2150,12 +2150,12 @@ define amdgpu_kernel void @v_fcmp_f16_ugt(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_uge(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_uge(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2207,12 +2207,12 @@ define amdgpu_kernel void @v_fcmp_f16_uge(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ult(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2264,12 +2264,12 @@ define amdgpu_kernel void @v_fcmp_f16_ult(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ule(i32 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { ; SDAG-GFX11-LABEL: v_fcmp_f16_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -2321,7 +2321,7 @@ define amdgpu_kernel void @v_fcmp_f16_ule(i32 addrspace(1)* %out, half %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll index d809391c32aa8c..9b7fff3a7ebb49 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll @@ -14,7 +14,7 @@ declare float @llvm.fabs.f32(float) #0 declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 declare half @llvm.fabs.f16(half) #0 -define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) { ; GFX11-LABEL: v_fcmp_f32_oeq_with_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -66,11 +66,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(i64 addrspace(1)* %out, floa ; SDAG-VI-NEXT: s_endpgm %temp = call float @llvm.fabs.f32(float %a) %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, float %src, float %a) { +define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(1) %out, float %src, float %a) { ; GFX11-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -123,11 +123,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(i64 addrspace( %temp = call float @llvm.fabs.f32(float %a) %src_input = call float @llvm.fabs.f32(float %src) %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32(ptr addrspace(1) %out, float %src) { ; SDAG-GFX-LABEL: v_fcmp_f32: ; SDAG-GFX: ; %bb.0: ; SDAG-GFX-NEXT: s_endpgm @@ -177,11 +177,11 @@ define amdgpu_kernel void @v_fcmp_f32(i64 addrspace(1)* %out, float %src) { ; GISEL-GFX-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] ; GISEL-GFX-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_oeq(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_oeq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -237,11 +237,11 @@ define amdgpu_kernel void @v_fcmp_f32_oeq(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_one(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_one: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -297,11 +297,11 @@ define amdgpu_kernel void @v_fcmp_f32_one(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ogt(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ogt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -357,11 +357,11 @@ define amdgpu_kernel void @v_fcmp_f32_ogt(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_oge(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_oge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -417,11 +417,11 @@ define amdgpu_kernel void @v_fcmp_f32_oge(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_olt(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_olt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -477,11 +477,11 @@ define amdgpu_kernel void @v_fcmp_f32_olt(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ole: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -537,12 +537,12 @@ define amdgpu_kernel void @v_fcmp_f32_ole(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ueq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -598,11 +598,11 @@ define amdgpu_kernel void @v_fcmp_f32_ueq(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_une(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_une: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -658,11 +658,11 @@ define amdgpu_kernel void @v_fcmp_f32_une(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ugt(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -718,11 +718,11 @@ define amdgpu_kernel void @v_fcmp_f32_ugt(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_uge(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -778,11 +778,11 @@ define amdgpu_kernel void @v_fcmp_f32_uge(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ult(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -838,11 +838,11 @@ define amdgpu_kernel void @v_fcmp_f32_ult(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { +define amdgpu_kernel void @v_fcmp_f32_ule(ptr addrspace(1) %out, float %src) { ; GFX11-LABEL: v_fcmp_f32_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -898,11 +898,11 @@ define amdgpu_kernel void @v_fcmp_f32_ule(i64 addrspace(1)* %out, float %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_oeq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -973,11 +973,11 @@ define amdgpu_kernel void @v_fcmp_f64_oeq(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_one: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1048,11 +1048,11 @@ define amdgpu_kernel void @v_fcmp_f64_one(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ogt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1123,11 +1123,11 @@ define amdgpu_kernel void @v_fcmp_f64_ogt(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_oge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1198,11 +1198,11 @@ define amdgpu_kernel void @v_fcmp_f64_oge(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_olt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1273,11 +1273,11 @@ define amdgpu_kernel void @v_fcmp_f64_olt(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ole: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1348,11 +1348,11 @@ define amdgpu_kernel void @v_fcmp_f64_ole(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ueq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1423,11 +1423,11 @@ define amdgpu_kernel void @v_fcmp_f64_ueq(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_une: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1498,11 +1498,11 @@ define amdgpu_kernel void @v_fcmp_f64_une(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1573,11 +1573,11 @@ define amdgpu_kernel void @v_fcmp_f64_ugt(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1648,11 +1648,11 @@ define amdgpu_kernel void @v_fcmp_f64_uge(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1723,11 +1723,11 @@ define amdgpu_kernel void @v_fcmp_f64_ult(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { +define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { ; GFX11-LABEL: v_fcmp_f64_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1798,12 +1798,12 @@ define amdgpu_kernel void @v_fcmp_f64_ule(i64 addrspace(1)* %out, double %src) { ; SDAG-GFX9-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] ; SDAG-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half %src, half %a) { +define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half %src, half %a) { ; GFX11-LABEL: v_fcmp_f16_oeq_with_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1865,12 +1865,12 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(i64 addrspace(1)* %out, half ; SDAG-VI-NEXT: s_endpgm %temp = call half @llvm.fabs.f16(half %a) %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace(1)* %out, half %src, half %a) { +define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(1) %out, half %src, half %a) { ; GFX11-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1933,11 +1933,11 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(i64 addrspace( %temp = call half @llvm.fabs.f16(half %a) %src_input = call half @llvm.fabs.f16(half %src) %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16(ptr addrspace(1) %out, half %src) { ; SDAG-GFX-LABEL: v_fcmp_f16: ; SDAG-GFX: ; %bb.0: ; SDAG-GFX-NEXT: s_endpgm @@ -1987,12 +1987,12 @@ define amdgpu_kernel void @v_fcmp_f16(i64 addrspace(1)* %out, half %src) { ; GISEL-GFX-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] ; GISEL-GFX-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_oeq(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_oeq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2048,12 +2048,12 @@ define amdgpu_kernel void @v_fcmp_f16_oeq(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_one(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_one: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2109,12 +2109,12 @@ define amdgpu_kernel void @v_fcmp_f16_one(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ogt(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ogt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2170,12 +2170,12 @@ define amdgpu_kernel void @v_fcmp_f16_ogt(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_oge(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_oge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2231,12 +2231,12 @@ define amdgpu_kernel void @v_fcmp_f16_oge(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_olt(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_olt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2292,12 +2292,12 @@ define amdgpu_kernel void @v_fcmp_f16_olt(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ole(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ole: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2353,12 +2353,12 @@ define amdgpu_kernel void @v_fcmp_f16_ole(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ueq(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ueq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2414,12 +2414,12 @@ define amdgpu_kernel void @v_fcmp_f16_ueq(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_une(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_une: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2475,12 +2475,12 @@ define amdgpu_kernel void @v_fcmp_f16_une(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ugt(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2536,12 +2536,12 @@ define amdgpu_kernel void @v_fcmp_f16_ugt(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_uge(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2597,12 +2597,12 @@ define amdgpu_kernel void @v_fcmp_f16_uge(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2658,12 +2658,12 @@ define amdgpu_kernel void @v_fcmp_f16_ult(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) { +define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { ; GFX11-LABEL: v_fcmp_f16_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2719,7 +2719,7 @@ define amdgpu_kernel void @v_fcmp_f16_ule(i64 addrspace(1)* %out, half %src) { ; SDAG-VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3] ; SDAG-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll index 248ee9904da030..d3ec46fcf7d480 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll @@ -8,9 +8,9 @@ declare float @llvm.amdgcn.fdiv.fast(float, float) #0 ; CHECK: v_rcp_f32_e32 ; CHECK: v_mul_f32_e32 ; CHECK: v_mul_f32_e32 -define amdgpu_kernel void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 { +define amdgpu_kernel void @test_fdiv_fast(ptr addrspace(1) %out, float %a, float %b) #1 { %fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b) - store float %fdiv, float addrspace(1)* %out + store float %fdiv, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll index 295684a77e972d..b5d5be5fa6555a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll @@ -18,16 +18,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16( ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - i16 addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - i16 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load i16, i16 addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load i16, ptr addrspace(1) %c %r.val = call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a.val, <2 x i16> %b.val, i16 %c.val) - store i16 %r.val, i16 addrspace(1)* %r + store i16 %r.val, ptr addrspace(1) %r ret void } @@ -57,19 +57,19 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16_dpp( ; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0 ; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GISEL-GFX11-NEXT: s_endpgm - i16 addrspace(5)* %r, - <2 x i16> addrspace(5)* %a, - <2 x i16> addrspace(5)* %b, - i16 addrspace(5)* %c) { + ptr addrspace(5) %r, + ptr addrspace(5) %a, + ptr addrspace(5) %b, + ptr addrspace(5) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(5)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(5)* %b - %c.val = load i16, i16 addrspace(5)* %c + %a.val = load <2 x i16>, ptr addrspace(5) %a + %b.val = load <2 x i16>, ptr addrspace(5) %b + %c.val = load i16, ptr addrspace(5) %c %a.val.i32 = bitcast <2 x i16> %a.val to i32 %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1) %a.val.dpp.v2i16 = bitcast i32 %dpp to <2 x i16> %r.val = call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a.val.dpp.v2i16, <2 x i16> %b.val, i16 %c.val) - store i16 %r.val, i16 addrspace(5)* %r + store i16 %r.val, ptr addrspace(5) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll index 1be7a0fc7d4710..dde7df0f794803 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll @@ -18,16 +18,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16( ; GFX11-NEXT: global_store_b16 v0, v1, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b, - half addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a.val, <2 x half> %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -57,19 +57,19 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp( ; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0 ; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GISEL-GFX11-NEXT: s_endpgm - half addrspace(5)* %r, - <2 x half> addrspace(5)* %a, - <2 x half> addrspace(5)* %b, - half addrspace(5)* %c) { + ptr addrspace(5) %r, + ptr addrspace(5) %a, + ptr addrspace(5) %b, + ptr addrspace(5) %c) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(5)* %a - %b.val = load <2 x half>, <2 x half> addrspace(5)* %b - %c.val = load half, half addrspace(5)* %c + %a.val = load <2 x half>, ptr addrspace(5) %a + %b.val = load <2 x half>, ptr addrspace(5) %b + %c.val = load half, ptr addrspace(5) %c %a.val.i32 = bitcast <2 x half> %a.val to i32 %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1) %a.val.dpp.v2half = bitcast i32 %dpp to <2 x half> %r.val = call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a.val.dpp.v2half, <2 x half> %b.val, half %c.val) - store half %r.val, half addrspace(5)* %r + store half %r.val, ptr addrspace(5) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll index 516e86828bd424..8276df236d8a43 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll @@ -19,16 +19,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp( ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - float addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - float addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 1) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -48,15 +48,15 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_no_clamp( ; GFX11-NEXT: global_store_b32 v1, v0, s[0:1] ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - float addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - float addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 0) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll index 2425fbb1896257..3ced3765b91436 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll @@ -10,16 +10,16 @@ declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %cla ; GFX9: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp( - float addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b, - float addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 1) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -28,16 +28,16 @@ entry: ; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: {{v_dot2c_f32_f16_e32|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp( - float addrspace(1)* %r, - <2 x half> addrspace(1)* %a, - <2 x half> addrspace(1)* %b, - float addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x half>, <2 x half> addrspace(1)* %a - %b.val = load <2 x half>, <2 x half> addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + %a.val = load <2 x half>, ptr addrspace(1) %a + %b.val = load <2 x half>, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 0) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll index cd9c47a57c5227..226670a550014c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll @@ -7,54 +7,54 @@ declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c) ; GCN-LABEL: {{^}}mad_f16: ; GCN: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}} define amdgpu_kernel void @mad_f16( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f16_imm_a: ; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+}} define amdgpu_kernel void @mad_f16_imm_a( - half addrspace(1)* %r, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f16_imm_b: ; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}} define amdgpu_kernel void @mad_f16_imm_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %c.val = load half, ptr addrspace(1) %c %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f16_imm_c: ; GCN: v_madak_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4800{{$}} define amdgpu_kernel void @mad_f16_imm_c( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -62,16 +62,16 @@ define amdgpu_kernel void @mad_f16_imm_c( ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mad_f16_neg_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %neg.b = fsub half -0.0, %b.val %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -79,16 +79,16 @@ define amdgpu_kernel void @mad_f16_neg_b( ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} define amdgpu_kernel void @mad_f16_abs_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %abs.b = call half @llvm.fabs.f16(half %b.val) %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -96,17 +96,17 @@ define amdgpu_kernel void @mad_f16_abs_b( ; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} ; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} define amdgpu_kernel void @mad_f16_neg_abs_b( - half addrspace(1)* %r, - half addrspace(1)* %a, - half addrspace(1)* %b, - half addrspace(1)* %c) { - %a.val = load half, half addrspace(1)* %a - %b.val = load half, half addrspace(1)* %b - %c.val = load half, half addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load half, ptr addrspace(1) %b + %c.val = load half, ptr addrspace(1) %c %abs.b = call half @llvm.fabs.f16(half %b.val) %neg.abs.b = fsub half -0.0, %abs.b %r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll index 7c4608bf55d02c..53f12c88eb21c6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll @@ -8,28 +8,28 @@ declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c) ; GCN-LABEL: {{^}}mad_f32: ; GCN: v_ma{{[dc]}}_f32 define amdgpu_kernel void @mad_f32( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b, - float addrspace(1)* %c) { - %a.val = load float, float addrspace(1)* %a - %b.val = load float, float addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load float, ptr addrspace(1) %a + %b.val = load float, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f32_imm_a: ; GCN: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, 0x41000000, define amdgpu_kernel void @mad_f32_imm_a( - float addrspace(1)* %r, - float addrspace(1)* %b, - float addrspace(1)* %c) { - %b.val = load float, float addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %b.val = load float, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -37,13 +37,13 @@ define amdgpu_kernel void @mad_f32_imm_a( ; GCN: v_mov_b32_e32 [[KB:v[0-9]+]], 0x41000000 ; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{[s][0-9]+}}, [[KB]] define amdgpu_kernel void @mad_f32_imm_b( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %c) { - %a.val = load float, float addrspace(1)* %a - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %c) { + %a.val = load float, ptr addrspace(1) %a + %c.val = load float, ptr addrspace(1) %c %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } @@ -54,62 +54,62 @@ define amdgpu_kernel void @mad_f32_imm_b( ; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]] ; GCN: v_mac_f32_e32 [[C]], {{s[0-9]+}}, [[VB]]{{$}} define amdgpu_kernel void @mad_f32_imm_c( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b) { - %a.val = load float, float addrspace(1)* %a - %b.val = load float, float addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { + %a.val = load float, ptr addrspace(1) %a + %b.val = load float, ptr addrspace(1) %b %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f32_neg_b: ; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}} define amdgpu_kernel void @mad_f32_neg_b( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b, - float addrspace(1)* %c) { - %a.val = load float, float addrspace(1)* %a - %b.val = load float, float addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load float, ptr addrspace(1) %a + %b.val = load float, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %neg.b = fneg float %b.val %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f32_abs_b: ; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}} define amdgpu_kernel void @mad_f32_abs_b( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b, - float addrspace(1)* %c) { - %a.val = load float, float addrspace(1)* %a - %b.val = load float, float addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load float, ptr addrspace(1) %a + %b.val = load float, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %abs.b = call float @llvm.fabs.f32(float %b.val) %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } ; GCN-LABEL: {{^}}mad_f32_neg_abs_b: ; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}} define amdgpu_kernel void @mad_f32_neg_abs_b( - float addrspace(1)* %r, - float addrspace(1)* %a, - float addrspace(1)* %b, - float addrspace(1)* %c) { - %a.val = load float, float addrspace(1)* %a - %b.val = load float, float addrspace(1)* %b - %c.val = load float, float addrspace(1)* %c + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { + %a.val = load float, ptr addrspace(1) %a + %b.val = load float, ptr addrspace(1) %b + %c.val = load float, ptr addrspace(1) %c %abs.b = call float @llvm.fabs.f32(float %b.val) %neg.abs.b = fneg float %abs.b %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val) - store float %r.val, float addrspace(1)* %r + store float %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll index 91d1857f306b21..81e48143caecd2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll @@ -2,7 +2,7 @@ ; GCN-LABEL: {{^}}test_fmed3_f16: ; GCN: v_med3_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_fmed3_f16(half addrspace(1)* %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 { +define amdgpu_kernel void @test_fmed3_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 { %src0.f16 = trunc i32 %src0.arg to i16 %src0 = bitcast i16 %src0.f16 to half %src1.f16 = trunc i32 %src1.arg to i16 @@ -10,13 +10,13 @@ define amdgpu_kernel void @test_fmed3_f16(half addrspace(1)* %out, i32 %src0.arg %src2.f16 = trunc i32 %src2.arg to i16 %src2 = bitcast i16 %src2.f16 to half %mad = call half @llvm.amdgcn.fmed3.f16(half %src0, half %src1, half %src2) - store half %mad, half addrspace(1)* %out + store half %mad, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fmed3_srcmods_f16: ; GCN: v_med3_f16 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}| -define amdgpu_kernel void @test_fmed3_srcmods_f16(half addrspace(1)* %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 { +define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 { %src0.f16 = trunc i32 %src0.arg to i16 %src0 = bitcast i16 %src0.f16 to half %src1.f16 = trunc i32 %src1.arg to i16 @@ -28,7 +28,7 @@ define amdgpu_kernel void @test_fmed3_srcmods_f16(half addrspace(1)* %out, i32 % %src2.fabs = call half @llvm.fabs.f16(half %src2) %src2.fneg.fabs = fsub half -0.0, %src2.fabs %mad = call half @llvm.amdgcn.fmed3.f16(half %src0.fneg, half %src1.fabs, half %src2.fneg.fabs) - store half %mad, half addrspace(1)* %out + store half %mad, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll index 05b074bfe2d411..015017c2ec9357 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll @@ -3,62 +3,62 @@ ; GCN-LABEL: {{^}}test_fmed3: ; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { +define amdgpu_kernel void @test_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2) - store float %med3, float addrspace(1)* %out + store float %med3, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fmed3_srcmods: ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}| -define amdgpu_kernel void @test_fmed3_srcmods(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { +define amdgpu_kernel void @test_fmed3_srcmods(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 { %src0.fneg = fsub float -0.0, %src0 %src1.fabs = call float @llvm.fabs.f32(float %src1) %src2.fabs = call float @llvm.fabs.f32(float %src2) %src2.fneg.fabs = fsub float -0.0, %src2.fabs %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0.fneg, float %src1.fabs, float %src2.fneg.fabs) - store float %med3, float addrspace(1)* %out + store float %med3, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fneg_fmed3: ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} -define amdgpu_kernel void @test_fneg_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { +define amdgpu_kernel void @test_fneg_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2) %neg.med3 = fsub float -0.0, %med3 - store float %neg.med3, float addrspace(1)* %out + store float %neg.med3, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fneg_fmed3_multi_use: ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]] -define amdgpu_kernel void @test_fneg_fmed3_multi_use(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { +define amdgpu_kernel void @test_fneg_fmed3_multi_use(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2) %neg.med3 = fsub float -0.0, %med3 %med3.user = fmul float %med3, 4.0 - store volatile float %med3.user, float addrspace(1)* %out - store volatile float %neg.med3, float addrspace(1)* %out + store volatile float %med3.user, ptr addrspace(1) %out + store volatile float %neg.med3, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fabs_fmed3: ; GCN: v_med3_f32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff, [[MED3]] -define amdgpu_kernel void @test_fabs_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 { +define amdgpu_kernel void @test_fabs_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2) %fabs.med3 = call float @llvm.fabs.f32(float %med3) - store float %fabs.med3, float addrspace(1)* %out + store float %fabs.med3, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0: ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]] -define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 { +define amdgpu_kernel void @test_fneg_fmed3_rr_0(ptr addrspace(1) %out, float %src0, float %src1) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0) %neg.med3 = fsub float -0.0, %med3 - store float %neg.med3, float addrspace(1)* %out + store float %neg.med3, ptr addrspace(1) %out ret void } @@ -67,11 +67,11 @@ define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float ; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]] ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]] -define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(float addrspace(1)* %out, float %src0, float %src1, float %mul.arg) #1 { +define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(ptr addrspace(1) %out, float %src0, float %src1, float %mul.arg) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0) %neg.med3 = fsub float -0.0, %med3 %mul = fmul float %neg.med3, %mul.arg - store float %mul, float addrspace(1)* %out + store float %mul, ptr addrspace(1) %out ret void } @@ -79,10 +79,10 @@ define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(float addrspace(1) ; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1 ; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]] -define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out, float %src0) #1 { +define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(ptr addrspace(1) %out, float %src0) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0) %neg.med3 = fsub float -0.0, %med3 - store float %neg.med3, float addrspace(1)* %out + store float %neg.med3, ptr addrspace(1) %out ret void } @@ -91,11 +91,11 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out, ; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]] ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]] -define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 { +define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspace(1) %out, float %src0, float %mul.arg) #1 { %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0) %neg.med3 = fsub float -0.0, %med3 %mul = fmul float %neg.med3, %mul.arg - store float %mul, float addrspace(1)* %out + store float %mul, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll index 37637026180f59..e824f8922550b8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll @@ -9,38 +9,38 @@ ; GCN-LABEL: {{^}}test_mul_legacy_f32: ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 { +define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32: ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32: ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) #0 { %result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32: ; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}| ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |s{{[0-9]+}}| -define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 { +define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) #0 { %a.fabs = call float @llvm.fabs.f32(float %a) %b.fabs = call float @llvm.fabs.f32(float %b) %result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs) - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } @@ -50,10 +50,10 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl ; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 { +define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #0 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) %add = fadd float %mul, %c - store float %add, float addrspace(1)* %out, align 4 + store float %add, ptr addrspace(1) %out, align 4 ret void } @@ -66,10 +66,10 @@ define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, flo ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 { +define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) %add = fadd float %mul, %c - store float %add, float addrspace(1)* %out, align 4 + store float %add, ptr addrspace(1) %out, align 4 ret void } @@ -80,10 +80,10 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float % ; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} ; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}} ; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 { +define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) #2 { %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0) %add = fadd float %mul, %c - store float %add, float addrspace(1)* %out, align 4 + store float %add, ptr addrspace(1) %out, align 4 ret void } @@ -93,12 +93,12 @@ define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, flo ; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} ; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}} ; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 { +define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 { %a.fneg = fneg float %a %b.fneg = fneg float %b %mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg) %add = fadd float %mul, %c - store float %add, float addrspace(1)* %out, align 4 + store float %add, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll index 026f6901fc7f20..61bcf4c85826e2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.fract.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @fract_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.fract.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll index 94dc2f4f68201c..092010bc6c137f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll @@ -6,26 +6,26 @@ declare double @llvm.amdgcn.fract.f64(double) #0 ; GCN-LABEL: {{^}}v_fract_f32: ; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @v_fract_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) #1 { %fract = call float @llvm.amdgcn.fract.f32(float %src) - store float %fract, float addrspace(1)* %out + store float %fract, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_fract_f64: ; GCN: v_fract_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @v_fract_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @v_fract_f64(ptr addrspace(1) %out, double %src) #1 { %fract = call double @llvm.amdgcn.fract.f64(double %src) - store double %fract, double addrspace(1)* %out + store double %fract, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_fract_undef_f32: ; GCN-NOT: v_fract_f32 ; GCN-NOT: store_dword -define amdgpu_kernel void @v_fract_undef_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @v_fract_undef_f32(ptr addrspace(1) %out) #1 { %fract = call float @llvm.amdgcn.fract.f32(float undef) - store float %fract, float addrspace(1)* %out + store float %fract, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll index ee07678e1b3a02..bef73141c1de50 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll @@ -7,12 +7,12 @@ declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a) ; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] ; GCN: buffer_store_short v[[R_I16]] define amdgpu_kernel void @frexp_exp_f16( - i16 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val) - store i16 %r.val, i16 addrspace(1)* %r + store i16 %r.val, ptr addrspace(1) %r ret void } @@ -22,13 +22,13 @@ entry: ; VI: v_bfe_i32 v[[R_I32:[0-9]+]], v[[R_I16]], 0, 16{{$}} ; GCN: buffer_store_dword v[[R_I32]] define amdgpu_kernel void @frexp_exp_f16_sext( - i32 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val) %r.val.sext = sext i16 %r.val to i32 - store i32 %r.val.sext, i32 addrspace(1)* %r + store i32 %r.val.sext, ptr addrspace(1) %r ret void } @@ -37,12 +37,12 @@ entry: ; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]] ; GCN: buffer_store_dword v[[R_I16]] define amdgpu_kernel void @frexp_exp_f16_zext( - i32 addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val) %r.val.zext = zext i16 %r.val to i32 - store i32 %r.val.zext, i32 addrspace(1)* %r + store i32 %r.val.zext, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll index 0d686147caf81f..d821def2fc9857 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll @@ -8,55 +8,55 @@ declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0 ; GCN-LABEL: {{^}}s_test_frexp_exp_f32: ; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @s_test_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 { %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32: ; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}| -define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 { %fabs.src = call float @llvm.fabs.f32(float %src) %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32: ; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}| -define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 { %fabs.src = call float @llvm.fabs.f32(float %src) %fneg.fabs.src = fsub float -0.0, %fabs.src %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fneg.fabs.src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_frexp_exp_f64: ; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @s_test_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 { %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64: ; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, |{{s\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 { %fabs.src = call double @llvm.fabs.f64(double %src) %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64: ; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -|{{s\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 { %fabs.src = call double @llvm.fabs.f64(double %src) %fneg.fabs.src = fsub double -0.0, %fabs.src %frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fneg.fabs.src) - store i32 %frexp.exp, i32 addrspace(1)* %out + store i32 %frexp.exp, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll index 722cd44e99fbd2..ba5f20a00bb7e8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.frexp.mant.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @frexp_mant_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.frexp.mant.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll index 605dc3db2b989a..0bc50b8a1d94a2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll @@ -8,55 +8,55 @@ declare double @llvm.amdgcn.frexp.mant.f64(double) #0 ; GCN-LABEL: {{^}}s_test_frexp_mant_f32: ; GCN: v_frexp_mant_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @s_test_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 { %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %src) - store float %frexp.mant, float addrspace(1)* %out + store float %frexp.mant, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f32: ; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}| -define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 { %fabs.src = call float @llvm.fabs.f32(float %src) %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fabs.src) - store float %frexp.mant, float addrspace(1)* %out + store float %frexp.mant, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f32: ; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}| -define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 { %fabs.src = call float @llvm.fabs.f32(float %src) %fneg.fabs.src = fsub float -0.0, %fabs.src %frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fneg.fabs.src) - store float %frexp.mant, float addrspace(1)* %out + store float %frexp.mant, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_frexp_mant_f64: ; GCN: v_frexp_mant_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @s_test_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 { %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %src) - store double %frexp.mant, double addrspace(1)* %out + store double %frexp.mant, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f64: ; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, |{{s\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 { %fabs.src = call double @llvm.fabs.f64(double %src) %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fabs.src) - store double %frexp.mant, double addrspace(1)* %out + store double %frexp.mant, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f64: ; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, -|{{s\[[0-9]+:[0-9]+\]}}| -define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 { %fabs.src = call double @llvm.fabs.f64(double %src) %fneg.fabs.src = fsub double -0.0, %fabs.src %frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fneg.fabs.src) - store double %frexp.mant, double addrspace(1)* %out + store double %frexp.mant, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll index c0bb6f64c9fc28..d7f122d2827b76 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll @@ -5,9 +5,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL -declare void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr, i32 %size, i32 %offset, i32 %aux) +declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux) -define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { +define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { ; GFX900-LABEL: global_load_lds_dword_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 @@ -46,11 +46,11 @@ define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %g ; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds ; GFX900-GISEL-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 16, i32 1) + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1) ret void } -define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr) { +define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) { ; GFX900-LABEL: global_load_lds_dword_saddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 @@ -94,11 +94,11 @@ define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture in ; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds ; GFX900-GISEL-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 32, i32 2) + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2) ret void } -define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr, i32 %voffset) { +define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) { ; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s2, v0 @@ -138,12 +138,12 @@ define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* no ; GFX900-GISEL-NEXT: s_endpgm main_body: %voffset.64 = zext i32 %voffset to i64 - %gep = getelementptr i8, i8 addrspace(1)* %gptr, i64 %voffset.64 - call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gep, i8 addrspace(3)* %lptr, i32 4, i32 48, i32 16) + %gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64 + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16) ret void } -define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { +define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { ; GFX900-LABEL: global_load_lds_ushort_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 @@ -182,11 +182,11 @@ define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture % ; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds ; GFX900-GISEL-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 2, i32 0, i32 4) + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 0, i32 4) ret void } -define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) { +define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) { ; GFX900-LABEL: global_load_lds_ubyte_vaddr: ; GFX900: ; %bb.0: ; %main_body ; GFX900-NEXT: v_readfirstlane_b32 s0, v2 @@ -225,6 +225,6 @@ define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %g ; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds ; GFX900-GISEL-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 1, i32 0, i32 0) + call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 0, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll index db4032efceabb3..63c2a117f668bc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll @@ -14,14 +14,14 @@ ; CHECK-LABEL: {{^}}groupstaticsize_test0: ; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo ; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0x800{{$}} -define amdgpu_kernel void @groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 { +define amdgpu_kernel void @groupstaticsize_test0(ptr addrspace(1) %out, ptr addrspace(1) %lds_size) #0 { %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 64 %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1 - store i32 %static_lds_size, i32 addrspace(1)* %lds_size, align 4 - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - store float %val0, float addrspace(1)* %out, align 4 + store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + store float %val0, ptr addrspace(1) %out, align 4 ret void } @@ -29,25 +29,25 @@ define amdgpu_kernel void @groupstaticsize_test0(float addrspace(1)* %out, i32 a ; CHECK-LABEL: {{^}}groupstaticsize_test1: ; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo ; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0xc00{{$}} -define amdgpu_kernel void @groupstaticsize_test1(float addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %lds_size) { +define amdgpu_kernel void @groupstaticsize_test1(ptr addrspace(1) %out, i32 %cond, ptr addrspace(1) %lds_size) { entry: %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1 - store i32 %static_lds_size, i32 addrspace(1)* %lds_size, align 4 + store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4 %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1 %idx.0 = add nsw i32 %tid.x, 64 %tmp = icmp eq i32 %cond, 0 br i1 %tmp, label %if, label %else if: ; preds = %entry - %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 - %val0 = load float, float addrspace(3)* %arrayidx0, align 4 - store float %val0, float addrspace(1)* %out, align 4 + %arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0 + %val0 = load float, ptr addrspace(3) %arrayidx0, align 4 + store float %val0, ptr addrspace(1) %out, align 4 br label %endif else: ; preds = %entry - %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0 - %val1 = load float, float addrspace(3)* %arrayidx1, align 4 - store float %val1, float addrspace(1)* %out, align 4 + %arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0 + %val1 = load float, ptr addrspace(3) %arrayidx1, align 4 + store float %val1, ptr addrspace(1) %out, align 4 br label %endif endif: ; preds = %else, %if @@ -58,11 +58,11 @@ endif: ; preds = %else, %if ; CHECK-LABEL: {{^}}large_groupstaticsize: ; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo ; HSA: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}} -define amdgpu_kernel void @large_groupstaticsize(i32 addrspace(1)* %size, i32 %idx) #0 { - %gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(3)* @large, i32 0, i32 %idx - store volatile i32 0, i32 addrspace(3)* %gep +define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %idx) #0 { + %gep = getelementptr inbounds [4096 x i32], ptr addrspace(3) @large, i32 0, i32 %idx + store volatile i32 0, ptr addrspace(3) %gep %static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() - store i32 %static_lds_size, i32 addrspace(1)* %size + store i32 %static_lds_size, ptr addrspace(1) %size ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll index 33681c3c96b742..22bdfd06918380 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll @@ -18,7 +18,7 @@ declare i32 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i32 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 declare i32 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 -define amdgpu_kernel void @v_icmp_i32_eq(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_eq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -70,11 +70,11 @@ define amdgpu_kernel void @v_icmp_i32_eq(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -100,11 +100,11 @@ define amdgpu_kernel void @v_icmp_i32(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ne(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ne(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_ne: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -156,11 +156,11 @@ define amdgpu_kernel void @v_icmp_i32_ne(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ugt(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -212,11 +212,11 @@ define amdgpu_kernel void @v_icmp_i32_ugt(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_uge(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -268,11 +268,11 @@ define amdgpu_kernel void @v_icmp_i32_uge(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ult(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -324,11 +324,11 @@ define amdgpu_kernel void @v_icmp_i32_ult(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ule(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -380,11 +380,11 @@ define amdgpu_kernel void @v_icmp_i32_ule(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sgt(i32 addrspace(1)* %out, i32 %src) #1 { +define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 { ; SDAG-GFX11-LABEL: v_icmp_i32_sgt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -436,11 +436,11 @@ define amdgpu_kernel void @v_icmp_i32_sgt(i32 addrspace(1)* %out, i32 %src) #1 { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sge(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_sge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -492,11 +492,11 @@ define amdgpu_kernel void @v_icmp_i32_sge(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_slt(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_slt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -548,11 +548,11 @@ define amdgpu_kernel void @v_icmp_i32_slt(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sle(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32_sle: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -604,11 +604,11 @@ define amdgpu_kernel void @v_icmp_i32_sle(i32 addrspace(1)* %out, i32 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_eq(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_eq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -656,11 +656,11 @@ define amdgpu_kernel void @v_icmp_i64_eq(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_ne(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_ne: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -708,11 +708,11 @@ define amdgpu_kernel void @v_icmp_i64_ne(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ugt(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -760,11 +760,11 @@ define amdgpu_kernel void @v_icmp_u64_ugt(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_uge(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -812,11 +812,11 @@ define amdgpu_kernel void @v_icmp_u64_uge(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ult(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -864,11 +864,11 @@ define amdgpu_kernel void @v_icmp_u64_ult(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ule(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_u64_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -916,11 +916,11 @@ define amdgpu_kernel void @v_icmp_u64_ule(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sgt(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sgt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -968,11 +968,11 @@ define amdgpu_kernel void @v_icmp_i64_sgt(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sge(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1020,11 +1020,11 @@ define amdgpu_kernel void @v_icmp_i64_sge(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_slt(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_slt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1072,11 +1072,11 @@ define amdgpu_kernel void @v_icmp_i64_slt(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sle(i32 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; SDAG-GFX11-LABEL: v_icmp_i64_sle: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1124,11 +1124,11 @@ define amdgpu_kernel void @v_icmp_i64_sle(i32 addrspace(1)* %out, i64 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_eq(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_eq(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_eq: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1180,11 +1180,11 @@ define amdgpu_kernel void @v_icmp_i16_eq(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -1210,11 +1210,11 @@ define amdgpu_kernel void @v_icmp_i16(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ne(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ne(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_ne: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1266,11 +1266,11 @@ define amdgpu_kernel void @v_icmp_i16_ne(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ugt(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ugt(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_ugt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1322,11 +1322,11 @@ define amdgpu_kernel void @v_icmp_i16_ugt(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_uge(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_uge(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_uge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1378,11 +1378,11 @@ define amdgpu_kernel void @v_icmp_i16_uge(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ult(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ult(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_ult: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1434,11 +1434,11 @@ define amdgpu_kernel void @v_icmp_i16_ult(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ule(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ule(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_ule: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1490,11 +1490,11 @@ define amdgpu_kernel void @v_icmp_i16_ule(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sgt(i32 addrspace(1)* %out, i16 %src) #1 { +define amdgpu_kernel void @v_icmp_i16_sgt(ptr addrspace(1) %out, i16 %src) #1 { ; SDAG-GFX11-LABEL: v_icmp_i16_sgt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1546,11 +1546,11 @@ define amdgpu_kernel void @v_icmp_i16_sgt(i32 addrspace(1)* %out, i16 %src) #1 { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sge(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_sge(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_sge: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1602,11 +1602,11 @@ define amdgpu_kernel void @v_icmp_i16_sge(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_slt(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_slt(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_slt: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1658,11 +1658,11 @@ define amdgpu_kernel void @v_icmp_i16_slt(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sle(i32 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16_sle: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_clause 0x1 @@ -1714,11 +1714,11 @@ define amdgpu_kernel void @v_icmp_i16_sle(i32 addrspace(1)* %out, i16 %src) { ; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[2:3] ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i1_ne0(i32 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) { ; GFX11-LABEL: v_icmp_i1_ne0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1751,11 +1751,11 @@ define amdgpu_kernel void @v_icmp_i1_ne0(i32 addrspace(1)* %out, i32 %a, i32 %b) %c1 = icmp ugt i32 %b, 2 %src = and i1 %c0, %c1 %result = call i32 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } -define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -1775,7 +1775,7 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 ; GISEL-GFX10-NEXT: global_store_dword v[0:1], v0, off ; GISEL-GFX10-NEXT: s_endpgm %result = call i32 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999) - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll index f76973f791b2a8..1f4754ce990b65 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll @@ -21,7 +21,7 @@ declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 -define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_eq(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_eq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -78,11 +78,11 @@ define amdgpu_kernel void @v_icmp_i32_eq(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 32) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: v_icmp_i32: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -121,11 +121,11 @@ define amdgpu_kernel void @v_icmp_i32(i64 addrspace(1)* %out, i32 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 30) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ne(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_ne: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -182,11 +182,11 @@ define amdgpu_kernel void @v_icmp_i32_ne(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 33) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ugt(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -243,11 +243,11 @@ define amdgpu_kernel void @v_icmp_i32_ugt(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 34) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_uge(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -304,11 +304,11 @@ define amdgpu_kernel void @v_icmp_i32_uge(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 35) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ult(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -365,11 +365,11 @@ define amdgpu_kernel void @v_icmp_i32_ult(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 36) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_ule(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -426,11 +426,11 @@ define amdgpu_kernel void @v_icmp_i32_ule(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 37) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { +define amdgpu_kernel void @v_icmp_i32_sgt(ptr addrspace(1) %out, i32 %src) #1 { ; GFX11-LABEL: v_icmp_i32_sgt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -487,11 +487,11 @@ define amdgpu_kernel void @v_icmp_i32_sgt(i64 addrspace(1)* %out, i32 %src) #1 { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 38) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_sge(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_sge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -548,11 +548,11 @@ define amdgpu_kernel void @v_icmp_i32_sge(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 39) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_slt(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_slt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -609,11 +609,11 @@ define amdgpu_kernel void @v_icmp_i32_slt(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 40) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_icmp_i32_sle(ptr addrspace(1) %out, i32 %src) { ; GFX11-LABEL: v_icmp_i32_sle: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -670,11 +670,11 @@ define amdgpu_kernel void @v_icmp_i32_sle(i64 addrspace(1)* %out, i32 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 41) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_eq(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_eq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -744,11 +744,11 @@ define amdgpu_kernel void @v_icmp_i64_eq(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 32) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_ne(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_ne: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -818,11 +818,11 @@ define amdgpu_kernel void @v_icmp_i64_ne(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 33) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ugt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -892,11 +892,11 @@ define amdgpu_kernel void @v_icmp_u64_ugt(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 34) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_uge(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -966,11 +966,11 @@ define amdgpu_kernel void @v_icmp_u64_uge(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 35) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ult(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1040,11 +1040,11 @@ define amdgpu_kernel void @v_icmp_u64_ult(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 36) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_u64_ule(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_u64_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1114,11 +1114,11 @@ define amdgpu_kernel void @v_icmp_u64_ule(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 37) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sgt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sgt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1188,11 +1188,11 @@ define amdgpu_kernel void @v_icmp_i64_sgt(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 38) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sge(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1262,11 +1262,11 @@ define amdgpu_kernel void @v_icmp_i64_sge(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 39) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_slt(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_slt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1336,11 +1336,11 @@ define amdgpu_kernel void @v_icmp_i64_slt(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 40) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_icmp_i64_sle(ptr addrspace(1) %out, i64 %src) { ; GFX11-LABEL: v_icmp_i64_sle: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -1410,11 +1410,11 @@ define amdgpu_kernel void @v_icmp_i64_sle(i64 addrspace(1)* %out, i64 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i64(i64 %src, i64 100, i32 41) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_eq(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_eq: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1471,11 +1471,11 @@ define amdgpu_kernel void @v_icmp_i16_eq(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 32) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16(ptr addrspace(1) %out, i16 %src) { ; SDAG-GFX11-LABEL: v_icmp_i16: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -1514,11 +1514,11 @@ define amdgpu_kernel void @v_icmp_i16(i64 addrspace(1)* %out, i16 %src) { ; GISEL-GFX9-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 30) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ne(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_ne: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1575,11 +1575,11 @@ define amdgpu_kernel void @v_icmp_i16_ne(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 33) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ugt(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_ugt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1636,11 +1636,11 @@ define amdgpu_kernel void @v_icmp_i16_ugt(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 34) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_uge(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_uge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1697,11 +1697,11 @@ define amdgpu_kernel void @v_icmp_i16_uge(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 35) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ult(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_ult: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1758,11 +1758,11 @@ define amdgpu_kernel void @v_icmp_i16_ult(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 36) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_ule(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_ule: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1819,11 +1819,11 @@ define amdgpu_kernel void @v_icmp_i16_ule(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 37) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 { +define amdgpu_kernel void @v_icmp_i16_sgt(ptr addrspace(1) %out, i16 %src) #1 { ; GFX11-LABEL: v_icmp_i16_sgt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1880,11 +1880,11 @@ define amdgpu_kernel void @v_icmp_i16_sgt(i64 addrspace(1)* %out, i16 %src) #1 { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 38) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_sge(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_sge: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -1941,11 +1941,11 @@ define amdgpu_kernel void @v_icmp_i16_sge(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 39) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_slt(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_slt: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2002,11 +2002,11 @@ define amdgpu_kernel void @v_icmp_i16_slt(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 40) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) { +define amdgpu_kernel void @v_icmp_i16_sle(ptr addrspace(1) %out, i16 %src) { ; GFX11-LABEL: v_icmp_i16_sle: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -2063,11 +2063,11 @@ define amdgpu_kernel void @v_icmp_i16_sle(i64 addrspace(1)* %out, i16 %src) { ; GISEL-VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GISEL-VI-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i16(i16 %src, i16 100, i32 41) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @v_icmp_i1_ne0(ptr addrspace(1) %out, i32 %a, i32 %b) { ; GFX11-LABEL: v_icmp_i1_ne0: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 @@ -2119,11 +2119,11 @@ define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) %c1 = icmp ugt i32 %b, 2 %src = and i1 %c0, %c1 %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } -define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i64 addrspace(1)* %out, i32 %src) { +define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(ptr addrspace(1) %out, i32 %src) { ; SDAG-GFX11-LABEL: test_intr_icmp_i32_invalid_cc: ; SDAG-GFX11: ; %bb.0: ; SDAG-GFX11-NEXT: s_endpgm @@ -2152,7 +2152,7 @@ define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i64 addrspace(1)* %out, i32 ; GISEL-GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off ; GISEL-GFX9-NEXT: s_endpgm %result = call i64 @llvm.amdgcn.icmp.i32(i32 %src, i32 100, i32 9999) - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll index 6248bef24b3a8a..1d18e05fad4814 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll @@ -11,7 +11,7 @@ entry: ret void } -define amdgpu_kernel void @test_iglp_opt_mfma_gemm(<32 x float> addrspace(3)* noalias %in, <32 x float> addrspace(3)* noalias %out) #0 { +define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 { ; GCN-LABEL: test_iglp_opt_mfma_gemm: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -122,31 +122,31 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(<32 x float> addrspace(3)* no entry: call void @llvm.amdgcn.iglp.opt(i32 0) %idx = call i32 @llvm.amdgcn.workitem.id.x() - %load.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %in, i32 %idx - %load.0 = load <32 x float>, <32 x float> addrspace(3)* %load.0.addr - %load.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.0.addr, i32 64 - %load.1 = load <32 x float>, <32 x float> addrspace(3)* %load.1.addr - %load.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.1.addr, i32 128 - %load.2 = load <32 x float>, <32 x float> addrspace(3)* %load.2.addr - %load.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.2.addr, i32 192 - %load.3 = load <32 x float>, <32 x float> addrspace(3)* %load.3.addr - %load.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.3.addr, i32 256 - %load.4 = load <32 x float>, <32 x float> addrspace(3)* %load.4.addr + %load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx + %load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr + %load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64 + %load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr + %load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128 + %load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr + %load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192 + %load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr + %load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256 + %load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.0, i32 0, i32 0, i32 0) %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.1, i32 0, i32 0, i32 0) %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.2, i32 0, i32 0, i32 0) %mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.3, i32 0, i32 0, i32 0) %mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.4, i32 0, i32 0, i32 0) - %store.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 %idx - store <32 x float> %mai.0, <32 x float> addrspace(3)* %store.0.addr - %store.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 64 - store <32 x float> %mai.1, <32 x float> addrspace(3)* %store.1.addr - %store.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 128 - store <32 x float> %mai.2, <32 x float> addrspace(3)* %store.2.addr - %store.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 192 - store <32 x float> %mai.3, <32 x float> addrspace(3)* %store.3.addr - %store.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 256 - store <32 x float> %mai.4, <32 x float> addrspace(3)* %store.4.addr + %store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx + store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr + %store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64 + store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr + %store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128 + store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr + %store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192 + store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr + %store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256 + store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll index 15269a88e61c71..5865f86ca6d9a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll @@ -11,12 +11,12 @@ main_body: ; GCN-LABEL: {{^}}load_1d_lwe: ; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}} -define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { main_body: %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -46,12 +46,12 @@ main_body: ; GCN-LABEL: {{^}}load_cube_lwe: ; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} -define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -73,12 +73,12 @@ main_body: ; GCN-LABEL: {{^}}load_2darray_lwe: ; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} -define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -269,13 +269,13 @@ main_body: ; GCN-LABEL: image_load_mmo ; GCN: image_load v1, v[{{[0-9:]+}}], s[0:7] dmask:0x1 unorm -define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { - store float 0.000000e+00, float addrspace(3)* %lds +define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 { + store float 0.000000e+00, ptr addrspace(3) %lds %c0 = extractelement <2 x i32> %c, i32 0 %c1 = extractelement <2 x i32> %c, i32 1 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.000000e+00, float addrspace(3)* %tmp2 + %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 + store float 0.000000e+00, ptr addrspace(3) %tmp2 ret float %tex } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll index 060c115ef78f08..51d41c7234b3b4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll @@ -41,7 +41,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -141,11 +141,11 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -245,7 +245,7 @@ main_body: %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -284,7 +284,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) { +define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) { ; VERDE-LABEL: load_2d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -389,7 +389,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -428,7 +428,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) { +define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) { ; VERDE-LABEL: load_3d_tfe_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -537,7 +537,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -576,7 +576,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { ; VERDE-LABEL: load_cube_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -685,7 +685,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -724,7 +724,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) { +define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %slice) { ; VERDE-LABEL: load_1darray_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -829,7 +829,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -868,7 +868,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { +define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) { ; VERDE-LABEL: load_2darray_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -977,7 +977,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1016,7 +1016,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { ; VERDE-LABEL: load_2dmsaa_both: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1125,7 +1125,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1164,7 +1164,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { ; VERDE-LABEL: load_2darraymsaa_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1278,7 +1278,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1317,7 +1317,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %mip) { ; VERDE-LABEL: load_mip_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1422,7 +1422,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1461,7 +1461,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) { +define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %mip) { ; VERDE-LABEL: load_mip_2d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v5, v0 @@ -1570,7 +1570,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1880,7 +1880,7 @@ main_body: ret float %vv } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_tfe_V4_dmask3: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v4, v0 @@ -1972,11 +1972,11 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_tfe_V4_dmask2: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v3, v0 @@ -2061,11 +2061,11 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_tfe_V4_dmask1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v2, v0 @@ -2142,11 +2142,11 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { +define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) { ; VERDE-LABEL: load_1d_tfe_V2_dmask1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v2, v0 @@ -2223,7 +2223,7 @@ main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<2 x float>, i32} %v, 0 %v.err = extractvalue {<2 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <2 x float> %v.vec } @@ -3779,7 +3779,7 @@ main_body: ret void } -define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { +define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 { ; VERDE-LABEL: image_load_mmo: ; VERDE: ; %bb.0: ; VERDE-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm @@ -3843,12 +3843,12 @@ define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3) ; GFX11-NEXT: v_mov_b32_e32 v0, v1 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: ; return to shader part epilog - store float 0.000000e+00, float addrspace(3)* %lds + store float 0.000000e+00, ptr addrspace(3) %lds %c0 = extractelement <2 x i32> %c, i32 0 %c1 = extractelement <2 x i32> %c, i32 1 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.000000e+00, float addrspace(3)* %tmp2 + %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 + store float 0.000000e+00, ptr addrspace(3) %tmp2 ret float %tex } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll index a8f395f841a6de..ffef2787468bf4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll @@ -10,12 +10,12 @@ main_body: ; GCN-LABEL: {{^}}load_2dmsaa_both: ; GFX11: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; -define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32 2, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -29,12 +29,12 @@ main_body: ; GCN-LABEL: {{^}}load_2darraymsaa_tfe: ; GFX11: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; -define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -72,12 +72,12 @@ main_body: ; GCN-LABEL: {{^}}load_2dmsaa_tfe_d16: ; GFX11: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ; -define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x half>, i32} %v, 0 %v.err = extractvalue {<4 x half>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x half> %v.vec } @@ -91,12 +91,12 @@ main_body: ; GCN-LABEL: {{^}}load_2darraymsaa_tfe_d16: ; GFX11: image_msaa_load v[0:2], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ; -define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: %v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x half>, i32} %v, 0 %v.err = extractvalue {<4 x half>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x half> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll index a90fc6299321b1..934dc96ab790fb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll @@ -10,12 +10,12 @@ main_body: ; GCN-LABEL: {{^}}load_2dmsaa_both: ; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ; -define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -29,56 +29,56 @@ main_body: ; GCN-LABEL: {{^}}load_2darraymsaa_tfe: ; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ; -define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } ; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask3: ; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; -define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } ; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask2: ; GFX10: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; -define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 6, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } ; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask1: ; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; -define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } ; GCN-LABEL: {{^}}load_2dmsaa_tfe_V2_dmask1: ; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ; -define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) { +define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) { main_body: %v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) %v.vec = extractvalue {<2 x float>, i32} %v, 0 %v.err = extractvalue {<2 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <2 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll index d61fb52e1269f6..35e2f017f51a8e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll @@ -46,7 +46,7 @@ main_body: ret half %tex } -define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) { +define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, ptr addrspace(1) inreg %out) { ; TONGA-LABEL: image_sample_2d_f16_tfe: ; TONGA: ; %bb.0: ; %main_body ; TONGA-NEXT: s_mov_b64 s[14:15], exec @@ -129,7 +129,7 @@ main_body: %tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) %tex.vec = extractvalue {half, i32} %tex, 0 %tex.err = extractvalue {half, i32} %tex, 1 - store i32 %tex.err, i32 addrspace(1)* %out, align 4 + store i32 %tex.err, ptr addrspace(1) %out, align 4 ret half %tex.vec } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll index e712a18b74df35..28d859b1222c47 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll @@ -13,12 +13,12 @@ main_body: ; GFX90A-LABEL: {{^}}sample_1d_lwe: ; GFX90A-NOT: s_wqm_b64 ; GFX90A: image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf lwe -define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll index 34fa7e9d81826d..90c357b3ea9975 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll @@ -36,7 +36,7 @@ main_body: ret <4 x float> %v } -define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { ; VERDE-LABEL: sample_1d_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[14:15], exec @@ -122,11 +122,11 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } -define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { ; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[12:13], exec @@ -499,7 +499,7 @@ main_body: ret <4 x float> %res } -define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) { +define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { ; VERDE-LABEL: sample_1d_lwe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: s_mov_b64 s[14:15], exec @@ -585,7 +585,7 @@ main_body: %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) %v.vec = extractvalue {<4 x float>, i32} %v, 0 %v.err = extractvalue {<4 x float>, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret <4 x float> %v.vec } @@ -1588,7 +1588,7 @@ main_body: ret float %v } -define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, i32 addrspace(1)* inreg %out) { +define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) { ; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: ; VERDE: ; %bb.0: ; %main_body ; VERDE-NEXT: v_mov_b32_e32 v9, 0 @@ -1643,7 +1643,7 @@ main_body: %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) %v.vec = extractvalue {float, i32} %v, 0 %v.err = extractvalue {float, i32} %v, 1 - store i32 %v.err, i32 addrspace(1)* %out, align 4 + store i32 %v.err, ptr addrspace(1) %out, align 4 ret float %v.vec } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll index a084fa08b804b8..77f57b0322711b 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.hsa.ll @@ -1,24 +1,22 @@ ; RUN: not llc -mtriple=amdgcn-amd-amdhsa < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: in function test_kernel{{.*}}: non-hsa intrinsic with hsa target -define amdgpu_kernel void @test_kernel(i32 addrspace(1)* %out) #1 { - %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() - %header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out) #1 { + %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() + %value = load i32, ptr addrspace(4) %implicit_buffer_ptr + store i32 %value, ptr addrspace(1) %out ret void } ; ERROR: in function test_func{{.*}}: non-hsa intrinsic with hsa target -define void @test_func(i32 addrspace(1)* %out) #1 { - %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() - %header_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define void @test_func(ptr addrspace(1) %out) #1 { + %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() + %value = load i32, ptr addrspace(4) %implicit_buffer_ptr + store i32 %value, ptr addrspace(1) %out ret void } -declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll index 6b22d92e367d93..e9d9b669408ac5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicit.buffer.ptr.ll @@ -9,10 +9,9 @@ ; GCN-NEXT: ; return define amdgpu_ps i32 @test_ps() #1 { %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() - %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %buffer_ptr + store volatile i32 0, ptr addrspace(5) %alloca + %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() + %value = load volatile i32, ptr addrspace(4) %implicit_buffer_ptr ret i32 %value } @@ -22,14 +21,13 @@ define amdgpu_ps i32 @test_ps() #1 { ; GCN: s_load_dword s0, s[0:1], 0x0 define amdgpu_cs i32 @test_cs() #1 { %alloca = alloca i32, addrspace(5) - store volatile i32 0, i32 addrspace(5)* %alloca - %implicit_buffer_ptr = call i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() - %buffer_ptr = bitcast i8 addrspace(4)* %implicit_buffer_ptr to i32 addrspace(4)* - %value = load volatile i32, i32 addrspace(4)* %buffer_ptr + store volatile i32 0, ptr addrspace(5) %alloca + %implicit_buffer_ptr = call ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() + %value = load volatile i32, ptr addrspace(4) %implicit_buffer_ptr ret i32 %value } -declare i8 addrspace(4)* @llvm.amdgcn.implicit.buffer.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.implicit.buffer.ptr() #0 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll index 92261d700446b1..c085a2d17f55f5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll @@ -15,9 +15,8 @@ ; COV5: .amdhsa_kernarg_size 256 define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -37,9 +36,8 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty() #0 { ; COV5: .amdhsa_kernarg_size 0 define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -57,9 +55,8 @@ define amdgpu_kernel void @kernel_implicitarg_ptr_empty_0implicit() #3 { ; COV5: .amdhsa_kernarg_size 48 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -77,9 +74,8 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr_empty() #1 { ; COV5: .amdhsa_kernarg_size 368 define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -97,9 +93,8 @@ define amdgpu_kernel void @kernel_implicitarg_ptr([112 x i8]) #0 { ; COV5: .amdhsa_kernarg_size 160 define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -109,9 +104,8 @@ define amdgpu_kernel void @opencl_kernel_implicitarg_ptr([112 x i8]) #1 { ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @func_implicitarg_ptr() #0 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -121,9 +115,8 @@ define void @func_implicitarg_ptr() #0 { ; GCN-NEXT: s_waitcnt ; GCN-NEXT: s_setpc_b64 define void @opencl_func_implicitarg_ptr() #0 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -249,12 +242,10 @@ define void @opencl_func_call_implicitarg_ptr_func() #0 { ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 ; GCN: s_waitcnt lgkmcnt(0) define void @func_kernarg_implicitarg_ptr() #0 { - %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr - %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg + %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr + %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -265,12 +256,10 @@ define void @func_kernarg_implicitarg_ptr() #0 { ; GCN: s_load_dword s{{[0-9]+}}, s[8:9], 0x0 ; GCN: s_waitcnt lgkmcnt(0) define void @opencl_func_kernarg_implicitarg_ptr() #0 { - %kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast.kernarg.segment.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %cast.implicitarg = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load0 = load volatile i32, i32 addrspace(4)* %cast.kernarg.segment.ptr - %load1 = load volatile i32, i32 addrspace(4)* %cast.implicitarg + %kernarg.segment.ptr = call ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load0 = load volatile i32, ptr addrspace(4) %kernarg.segment.ptr + %load1 = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -291,9 +280,8 @@ define amdgpu_kernel void @kernel_call_kernarg_implicitarg_ptr_func([112 x i8]) ; COV5: .amdhsa_kernarg_size 120 define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32>, i32) #1 { - %implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %cast = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %load = load volatile i32, i32 addrspace(4)* %cast + %implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %load = load volatile i32, ptr addrspace(4) %implicitarg.ptr ret void } @@ -396,8 +384,8 @@ define amdgpu_kernel void @kernel_implicitarg_no_struct_align_padding(<16 x i32> ; COV5-NEXT: .kernarg_segment_size: 120 ; COV5-LABEL: .name: kernel_implicitarg_no_struct_align_padding -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #2 -declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #2 +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2 +declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #2 attributes #0 = { nounwind noinline } attributes #1 = { nounwind noinline "amdgpu-implicitarg-num-bytes"="48" } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll index f7f1f96f4d3da1..3920356401d315 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -99,20 +99,20 @@ main_body: %array1 = alloca [20 x i32], align 16, addrspace(5) call void @llvm.amdgcn.init.exec(i64 -1) - %ptr0 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr0, align 4 + %ptr0 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr0, align 4 - %ptr1 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr1, align 4 + %ptr1 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr1, align 4 - %ptr2 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 2 - store i32 %b, i32 addrspace(5)* %ptr2, align 4 + %ptr2 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 2 + store i32 %b, ptr addrspace(5) %ptr2, align 4 - %ptr3 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 %b - %v3 = load i32, i32 addrspace(5)* %ptr3, align 4 + %ptr3 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 %b + %v3 = load i32, ptr addrspace(5) %ptr3, align 4 - %ptr4 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 %b - %v4 = load i32, i32 addrspace(5)* %ptr4, align 4 + %ptr4 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 %b + %v4 = load i32, ptr addrspace(5) %ptr4, align 4 %v5 = add i32 %v3, %v4 %v = bitcast i32 %v5 to float @@ -133,20 +133,20 @@ main_body: %array1 = alloca [20 x i32], align 16, addrspace(5) call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8) - %ptr0 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr0, align 4 + %ptr0 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr0, align 4 - %ptr1 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr1, align 4 + %ptr1 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr1, align 4 - %ptr2 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 2 - store i32 %b, i32 addrspace(5)* %ptr2, align 4 + %ptr2 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 2 + store i32 %b, ptr addrspace(5) %ptr2, align 4 - %ptr3 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 %b - %v3 = load i32, i32 addrspace(5)* %ptr3, align 4 + %ptr3 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 %b + %v3 = load i32, ptr addrspace(5) %ptr3, align 4 - %ptr4 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 %b - %v4 = load i32, i32 addrspace(5)* %ptr4, align 4 + %ptr4 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 %b + %v4 = load i32, ptr addrspace(5) %ptr4, align 4 %v5 = add i32 %v3, %v4 %v = bitcast i32 %v5 to float @@ -178,20 +178,20 @@ if: endif: call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8) - %ptr0 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr0, align 4 + %ptr0 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr0, align 4 - %ptr1 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 1 - store i32 %a, i32 addrspace(5)* %ptr1, align 4 + %ptr1 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 1 + store i32 %a, ptr addrspace(5) %ptr1, align 4 - %ptr2 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 2 - store i32 %b, i32 addrspace(5)* %ptr2, align 4 + %ptr2 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 2 + store i32 %b, ptr addrspace(5) %ptr2, align 4 - %ptr3 = getelementptr inbounds [20 x i32], [20 x i32] addrspace(5)* %array1, i32 0, i32 %b - %v3 = load i32, i32 addrspace(5)* %ptr3, align 4 + %ptr3 = getelementptr inbounds [20 x i32], ptr addrspace(5) %array1, i32 0, i32 %b + %v3 = load i32, ptr addrspace(5) %ptr3, align 4 - %ptr4 = getelementptr inbounds [1024 x i32], [1024 x i32] addrspace(5)* %array0, i32 0, i32 %b - %v4 = load i32, i32 addrspace(5)* %ptr4, align 4 + %ptr4 = getelementptr inbounds [1024 x i32], ptr addrspace(5) %array0, i32 0, i32 %b + %v4 = load i32, ptr addrspace(5) %ptr4, align 4 %v5 = add i32 %v3, %v4 %v6 = add i32 %v5, %count diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll index cdd61396f2cd23..bd9d5438e953f8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.inreg.ll @@ -73,7 +73,7 @@ main_body: ret void } -define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg %m0) #0 { +define amdgpu_ps void @v_interp_f32_many_vm(ptr addrspace(1) %ptr, i32 inreg %m0) #0 { ; GCN-LABEL: v_interp_f32_many_vm: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: global_load_b64 v[0:1], v[0:1], off offset:4 @@ -99,10 +99,10 @@ define amdgpu_ps void @v_interp_f32_many_vm(float addrspace(1)* %ptr, i32 inreg ; GCN-NEXT: exp mrt0 v6, v7, v8, v0 done ; GCN-NEXT: s_endpgm main_body: - %i.ptr = getelementptr float, float addrspace(1)* %ptr, i32 1 - %i = load float, float addrspace(1)* %i.ptr, align 4 - %j.ptr = getelementptr float, float addrspace(1)* %ptr, i32 2 - %j = load float, float addrspace(1)* %j.ptr, align 4 + %i.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 1 + %i = load float, ptr addrspace(1) %i.ptr, align 4 + %j.ptr = getelementptr float, ptr addrspace(1) %ptr, i32 2 + %j = load float, ptr addrspace(1) %j.ptr, align 4 %p0 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 0, i32 %m0) %p1 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 1, i32 %m0) %p2 = call float @llvm.amdgcn.lds.param.load(i32 0, i32 2, i32 %m0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll index 2ba23b72f92c0c..fd27d7e49b6677 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.interp.ll @@ -10,7 +10,7 @@ ; GCN-DAG: v_interp_p1_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} ; GCN-DAG: v_interp_p2_f32{{(_e32)*}} v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} ; GCN-DAG: v_interp_mov_f32{{(_e32)*}} v{{[0-9]+}}, p0, attr0.x{{$}} -define amdgpu_ps void @v_interp(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 { +define amdgpu_ps void @v_interp(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3, <2 x float> %arg4) #0 { main_body: %i = extractelement <2 x float> %arg4, i32 0 %j = extractelement <2 x float> %arg4, i32 1 @@ -54,18 +54,18 @@ bb: %p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256) %p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256) - store volatile float %p0_0, float addrspace(1)* undef - store volatile float %p0_1, float addrspace(1)* undef - store volatile float %p0_2, float addrspace(1)* undef - store volatile float %p0_3, float addrspace(1)* undef - store volatile float %p0_4, float addrspace(1)* undef - store volatile float %p0_5, float addrspace(1)* undef - store volatile float %p0_6, float addrspace(1)* undef - store volatile float %p0_7, float addrspace(1)* undef - store volatile float %p0_8, float addrspace(1)* undef - store volatile float %p0_9, float addrspace(1)* undef - store volatile float %p0_10, float addrspace(1)* undef - store volatile float %p0_11, float addrspace(1)* undef + store volatile float %p0_0, ptr addrspace(1) undef + store volatile float %p0_1, ptr addrspace(1) undef + store volatile float %p0_2, ptr addrspace(1) undef + store volatile float %p0_3, ptr addrspace(1) undef + store volatile float %p0_4, ptr addrspace(1) undef + store volatile float %p0_5, ptr addrspace(1) undef + store volatile float %p0_6, ptr addrspace(1) undef + store volatile float %p0_7, ptr addrspace(1) undef + store volatile float %p0_8, ptr addrspace(1) undef + store volatile float %p0_9, ptr addrspace(1) undef + store volatile float %p0_10, ptr addrspace(1) undef + store volatile float %p0_11, ptr addrspace(1) undef ret void } @@ -93,15 +93,15 @@ bb: %p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256) %p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256) - store volatile float %p2_0, float addrspace(1)* undef - store volatile float %p2_1, float addrspace(1)* undef - store volatile float %p2_2, float addrspace(1)* undef - store volatile float %p2_3, float addrspace(1)* undef - store volatile float %p2_4, float addrspace(1)* undef - store volatile float %p2_5, float addrspace(1)* undef - store volatile float %p2_6, float addrspace(1)* undef - store volatile float %p2_7, float addrspace(1)* undef - store volatile float %p2_8, float addrspace(1)* undef + store volatile float %p2_0, ptr addrspace(1) undef + store volatile float %p2_1, ptr addrspace(1) undef + store volatile float %p2_2, ptr addrspace(1) undef + store volatile float %p2_3, ptr addrspace(1) undef + store volatile float %p2_4, ptr addrspace(1) undef + store volatile float %p2_5, ptr addrspace(1) undef + store volatile float %p2_6, ptr addrspace(1) undef + store volatile float %p2_7, ptr addrspace(1) undef + store volatile float %p2_8, ptr addrspace(1) undef ret void } @@ -140,21 +140,21 @@ bb: %mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256) %mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256) - store volatile float %mov_0, float addrspace(1)* undef - store volatile float %mov_1, float addrspace(1)* undef - store volatile float %mov_2, float addrspace(1)* undef - store volatile float %mov_3, float addrspace(1)* undef - - store volatile float %mov_4, float addrspace(1)* undef - store volatile float %mov_5, float addrspace(1)* undef - store volatile float %mov_6, float addrspace(1)* undef - store volatile float %mov_7, float addrspace(1)* undef - store volatile float %mov_8, float addrspace(1)* undef - - store volatile float %mov_9, float addrspace(1)* undef - store volatile float %mov_10, float addrspace(1)* undef - store volatile float %mov_11, float addrspace(1)* undef - store volatile float %mov_12, float addrspace(1)* undef + store volatile float %mov_0, ptr addrspace(1) undef + store volatile float %mov_1, ptr addrspace(1) undef + store volatile float %mov_2, ptr addrspace(1) undef + store volatile float %mov_3, ptr addrspace(1) undef + + store volatile float %mov_4, ptr addrspace(1) undef + store volatile float %mov_5, ptr addrspace(1) undef + store volatile float %mov_6, ptr addrspace(1) undef + store volatile float %mov_7, ptr addrspace(1) undef + store volatile float %mov_8, ptr addrspace(1) undef + + store volatile float %mov_9, ptr addrspace(1) undef + store volatile float %mov_10, ptr addrspace(1) undef + store volatile float %mov_11, ptr addrspace(1) undef + store volatile float %mov_12, ptr addrspace(1) undef ret void } @@ -170,12 +170,12 @@ bb: ; TODO-VI-DAG: v_interp_mov_f32_e32 v{{[0-9]+}}, p0, attr0.x{{$}} ; TODO-VI: s_mov_b32 m0, -1{{$}} ; TODO-VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -;define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) #0 { +;define amdgpu_ps void @v_interp_readnone(ptr addrspace(3) %lds) #0 { ;bb: -; store float 0.000000e+00, float addrspace(3)* %lds +; store float 0.000000e+00, ptr addrspace(3) %lds ; %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0) -; %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 -; store float 0.000000e+00, float addrspace(3)* %tmp2 +; %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 +; store float 0.000000e+00, ptr addrspace(3) %tmp2 ; call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp1, float %tmp1, float %tmp1, float %tmp1, i1 true, i1 true) #0 ; ret void ;} @@ -185,7 +185,7 @@ bb: ; GCN-LABEL: {{^}}v_interp_p1_bank16_bug: ; 16BANK-NOT: v_interp_p1_f32{{(_e32)*}} [[DST:v[0-9]+]], [[DST]] -define amdgpu_ps void @v_interp_p1_bank16_bug([6 x <16 x i8>] addrspace(4)* inreg %arg, [17 x <16 x i8>] addrspace(4)* inreg %arg13, [17 x <4 x i32>] addrspace(4)* inreg %arg14, [34 x <8 x i32>] addrspace(4)* inreg %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 { +define amdgpu_ps void @v_interp_p1_bank16_bug(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg13, ptr addrspace(4) inreg %arg14, ptr addrspace(4) inreg %arg15, float inreg %arg16, i32 inreg %arg17, <2 x i32> %arg18, <2 x i32> %arg19, <2 x i32> %arg20, <3 x i32> %arg21, <2 x i32> %arg22, <2 x i32> %arg23, <2 x i32> %arg24, float %arg25, float %arg26, float %arg27, float %arg28, float %arg29, float %arg30, i32 %arg31, float %arg32, float %arg33) #0 { main_body: %i.i = extractelement <2 x i32> %arg19, i32 0 %j.i = extractelement <2 x i32> %arg19, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll index 5cf6a1b4d11e27..5c314044d5ff5e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.intersect_ray.ll @@ -160,7 +160,7 @@ main_body: ; TODO: NSA reassign is very limited and cannot work with VGPR tuples and subregs. -define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1013-LABEL: image_bvh_intersect_ray_nsa_reassign: ; GFX1013: ; %bb.0: ; %main_body ; GFX1013-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 @@ -240,10 +240,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid - %node_ptr = load i32, i32* %gep_node_ptr, align 4 - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid + %node_ptr = load i32, ptr %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -254,11 +254,11 @@ main_body: %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1 %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(ptr %p_node_ptr, ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1013-LABEL: image_bvh_intersect_ray_a16_nsa_reassign: ; GFX1013: ; %bb.0: ; %main_body ; GFX1013-NEXT: s_load_dwordx8 s[0:7], s[0:1], 0x24 @@ -330,10 +330,10 @@ define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid - %node_ptr = load i32, i32* %gep_node_ptr, align 4 - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_node_ptr = getelementptr inbounds i32, ptr %p_node_ptr, i32 %lid + %node_ptr = load i32, ptr %gep_node_ptr, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -344,11 +344,11 @@ main_body: %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1 %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1013-LABEL: image_bvh64_intersect_ray_nsa_reassign: ; GFX1013: ; %bb.0: ; %main_body ; GFX1013-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 @@ -426,8 +426,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -438,11 +438,11 @@ main_body: %ray_inv_dir1 = insertelement <3 x float> %ray_inv_dir0, float 7.0, i32 1 %ray_inv_dir = insertelement <3 x float> %ray_inv_dir1, float 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <3 x float> %ray_origin, <3 x float> %ray_dir, <3 x float> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } -define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) { +define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray, <4 x i32> inreg %tdescr) { ; GFX1013-LABEL: image_bvh64_intersect_ray_a16_nsa_reassign: ; GFX1013: ; %bb.0: ; %main_body ; GFX1013-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 @@ -512,8 +512,8 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ ; GFX11-NEXT: s_endpgm main_body: %lid = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid - %ray_extent = load float, float* %gep_ray, align 4 + %gep_ray = getelementptr inbounds float, ptr %p_ray, i32 %lid + %ray_extent = load float, ptr %gep_ray, align 4 %ray_origin0 = insertelement <3 x float> undef, float 0.0, i32 0 %ray_origin1 = insertelement <3 x float> %ray_origin0, float 1.0, i32 1 %ray_origin = insertelement <3 x float> %ray_origin1, float 2.0, i32 2 @@ -524,7 +524,7 @@ main_body: %ray_inv_dir1 = insertelement <3 x half> %ray_inv_dir0, half 7.0, i32 1 %ray_inv_dir = insertelement <3 x half> %ray_inv_dir1, half 8.0, i32 2 %v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <3 x float> %ray_origin, <3 x half> %ray_dir, <3 x half> %ray_inv_dir, <4 x i32> %tdescr) - store <4 x i32> %v, <4 x i32>* undef + store <4 x i32> %v, ptr undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll index f5c137a056b735..7760c41ce8f41d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.private.ll @@ -8,13 +8,13 @@ ; GFX9: s_lshl_b32 [[APERTURE]], [[APERTURE]], 16 ; GCN: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]] ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc -define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_private_vgpr(ptr addrspace(1) %ptr.ptr) { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id - %ptr = load volatile i8*, i8* addrspace(1)* %gep - %val = call i1 @llvm.amdgcn.is.private(i8* %ptr) + %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id + %ptr = load volatile ptr, ptr addrspace(1) %gep + %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) %ext = zext i1 %val to i32 - store i32 %ext, i32 addrspace(1)* undef + store i32 %ext, ptr addrspace(1) undef ret void } @@ -31,12 +31,12 @@ define amdgpu_kernel void @is_private_vgpr(i8* addrspace(1)* %ptr.ptr) { ; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]] ; GCN: s_cbranch_vccnz -define amdgpu_kernel void @is_private_sgpr(i8* %ptr) { - %val = call i1 @llvm.amdgcn.is.private(i8* %ptr) +define amdgpu_kernel void @is_private_sgpr(ptr %ptr) { + %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) br i1 %val, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb1 bb1: @@ -44,6 +44,6 @@ bb1: } declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i1 @llvm.amdgcn.is.private(i8* nocapture) #0 +declare i1 @llvm.amdgcn.is.private(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll index f98676b96439dc..69e49064c1c492 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.is.shared.ll @@ -9,13 +9,13 @@ ; GCN: v_cmp_eq_u32_e32 vcc, [[APERTURE]], v[[PTR_HI]] ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc -define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { +define amdgpu_kernel void @is_local_vgpr(ptr addrspace(1) %ptr.ptr) { %id = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i8*, i8* addrspace(1)* %ptr.ptr, i32 %id - %ptr = load volatile i8*, i8* addrspace(1)* %gep - %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr) + %gep = getelementptr inbounds ptr, ptr addrspace(1) %ptr.ptr, i32 %id + %ptr = load volatile ptr, ptr addrspace(1) %gep + %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) %ext = zext i1 %val to i32 - store i32 %ext, i32 addrspace(1)* undef + store i32 %ext, ptr addrspace(1) undef ret void } @@ -32,12 +32,12 @@ define amdgpu_kernel void @is_local_vgpr(i8* addrspace(1)* %ptr.ptr) { ; GCN: s_cmp_eq_u32 [[PTR_HI]], [[APERTURE]] ; GCN: s_cbranch_vccnz -define amdgpu_kernel void @is_local_sgpr(i8* %ptr) { - %val = call i1 @llvm.amdgcn.is.shared(i8* %ptr) +define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { + %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) br i1 %val, label %bb0, label %bb1 bb0: - store volatile i32 0, i32 addrspace(1)* undef + store volatile i32 0, ptr addrspace(1) undef br label %bb1 bb1: @@ -45,6 +45,6 @@ bb1: } declare i32 @llvm.amdgcn.workitem.id.x() #0 -declare i1 @llvm.amdgcn.is.shared(i8* nocapture) #0 +declare i1 @llvm.amdgcn.is.shared(ptr nocapture) #0 attributes #0 = { nounwind readnone speculatable } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll index 3ae0f77881d89b..0dadc392160689 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll @@ -10,12 +10,11 @@ ; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa -define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 { - %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) #1 { + %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store i32 %value, ptr addrspace(1) %out ret void } @@ -26,12 +25,11 @@ define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 { ; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15 ; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0x15 -define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %gep = getelementptr i32, ptr addrspace(4) %implicitarg.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store i32 %value, ptr addrspace(1) %out ret void } @@ -47,11 +45,10 @@ define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] -define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %arg.ptr - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #1 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load i32, ptr addrspace(4) %implicitarg.ptr + store i32 %val, ptr addrspace(1) %out ret void } @@ -67,11 +64,10 @@ define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x ; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]] ; MESA: buffer_store_dword [[V_VAL]] ; HSA: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]] -define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load i32, i32 addrspace(4)* %arg.ptr - store i32 %val, i32 addrspace(1)* %out +define amdgpu_kernel void @opencl_test_implicit_alignment(ptr addrspace(1) %out, <2 x i8> %in) #2 { + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load i32, ptr addrspace(4) %implicitarg.ptr + store i32 %val, ptr addrspace(1) %out ret void } @@ -84,11 +80,10 @@ define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out ; HSA: s_mov_b64 [[NULL:s\[[0-9]+:[0-9]+\]]], 0{{$}} ; HSA: s_load_dword s{{[0-9]+}}, [[NULL]], 0xa{{$}} define amdgpu_kernel void @test_no_kernargs() #1 { - %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() - %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)* - %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10 - %value = load i32, i32 addrspace(4)* %gep - store volatile i32 %value, i32 addrspace(1)* undef + %kernarg.segment.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() + %gep = getelementptr i32, ptr addrspace(4) %kernarg.segment.ptr, i64 10 + %value = load i32, ptr addrspace(4) %gep + store volatile i32 %value, ptr addrspace(1) undef ret void } @@ -97,10 +92,9 @@ define amdgpu_kernel void @test_no_kernargs() #1 { ; OS-MESA3d: kernarg_segment_byte_size = 16 ; CO-V2: kernarg_segment_alignment = 4 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %arg.ptr - store volatile i32 %val, i32 addrspace(1)* null + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr + store volatile i32 %val, ptr addrspace(1) null ret void } @@ -109,15 +103,14 @@ define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() ; OS-MESA3D: kernarg_segment_byte_size = 16 ; CO-V2: kernarg_segment_alignment = 4 define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 { - %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() - %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)* - %val = load volatile i32, i32 addrspace(4)* %arg.ptr - store volatile i32 %val, i32 addrspace(1)* null + %implicitarg.ptr = call noalias ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %val = load volatile i32, ptr addrspace(4) %implicitarg.ptr + store volatile i32 %val, ptr addrspace(1) null ret void } -declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 -declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() #0 +declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll index 0d172792cf2053..b9934f194df751 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll @@ -74,13 +74,13 @@ define amdgpu_kernel void @ldexp_f16( ; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a, - i32 addrspace(1)* %b) { - %a.val = load half, half addrspace(1)* %a - %b.val = load i32, i32 addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b) { + %a.val = load half, ptr addrspace(1) %a + %b.val = load i32, ptr addrspace(1) %b %r.val = call half @llvm.amdgcn.ldexp.f16(half %a.val, i32 %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -139,11 +139,11 @@ define amdgpu_kernel void @ldexp_f16_imm_a( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - i32 addrspace(1)* %b) { - %b.val = load i32, i32 addrspace(1)* %b + ptr addrspace(1) %r, + ptr addrspace(1) %b) { + %b.val = load i32, ptr addrspace(1) %b %r.val = call half @llvm.amdgcn.ldexp.f16(half 2.0, i32 %b.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } @@ -202,10 +202,10 @@ define amdgpu_kernel void @ldexp_f16_imm_b( ; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm - half addrspace(1)* %r, - half addrspace(1)* %a) { - %a.val = load half, half addrspace(1)* %a + ptr addrspace(1) %r, + ptr addrspace(1) %a) { + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.ldexp.f16(half %a.val, i32 2) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll index 1ab4e8b8063003..d4282061a3fceb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.ll @@ -7,25 +7,25 @@ declare double @llvm.amdgcn.ldexp.f64(double, i32) nounwind readnone ; SI-LABEL: {{^}}test_ldexp_f32: ; SI: v_ldexp_f32 ; SI: s_endpgm -define amdgpu_kernel void @test_ldexp_f32(float addrspace(1)* %out, float %a, i32 %b) nounwind { +define amdgpu_kernel void @test_ldexp_f32(ptr addrspace(1) %out, float %a, i32 %b) nounwind { %result = call float @llvm.amdgcn.ldexp.f32(float %a, i32 %b) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } ; SI-LABEL: {{^}}test_ldexp_f64: ; SI: v_ldexp_f64 ; SI: s_endpgm -define amdgpu_kernel void @test_ldexp_f64(double addrspace(1)* %out, double %a, i32 %b) nounwind { +define amdgpu_kernel void @test_ldexp_f64(ptr addrspace(1) %out, double %a, i32 %b) nounwind { %result = call double @llvm.amdgcn.ldexp.f64(double %a, i32 %b) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } ; SI-LABEL: {{^}}test_ldexp_undef_f32: ; SI-NOT: v_ldexp_f32 -define amdgpu_kernel void @test_ldexp_undef_f32(float addrspace(1)* %out, i32 %b) nounwind { +define amdgpu_kernel void @test_ldexp_undef_f32(ptr addrspace(1) %out, i32 %b) nounwind { %result = call float @llvm.amdgcn.ldexp.f32(float undef, i32 %b) nounwind readnone - store float %result, float addrspace(1)* %out, align 4 + store float %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll index 4e8cc7ba8d4f50..61818dafd2b84c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lds.kernel.id.ll @@ -4,7 +4,7 @@ declare i32 @llvm.amdgcn.lds.kernel.id() declare i32 @llvm.amdgcn.workgroup.id.x() -define void @function_lds_id(i32 addrspace(1)* %out) { +define void @function_lds_id(ptr addrspace(1) %out) { ; GCN-LABEL: function_lds_id: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -16,11 +16,11 @@ define void @function_lds_id(i32 addrspace(1)* %out) { %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id() %help = call i32 @llvm.amdgcn.workgroup.id.x() %both = add i32 %tmp0, %help - store i32 %both, i32 addrspace(1)* %out + store i32 %both, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @kernel_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 { +define amdgpu_kernel void @kernel_lds_id(ptr addrspace(1) %out) !llvm.amdgcn.lds.kernel.id !0 { ; GCN-LABEL: kernel_lds_id: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -34,11 +34,11 @@ define amdgpu_kernel void @kernel_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.ld %tmp0 = call i32 @llvm.amdgcn.lds.kernel.id() %help = call i32 @llvm.amdgcn.workgroup.id.x() %both = add i32 %tmp0, %help - store i32 %both, i32 addrspace(1)* %out + store i32 %both, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @indirect_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !1 { +define amdgpu_kernel void @indirect_lds_id(ptr addrspace(1) %out) !llvm.amdgcn.lds.kernel.id !1 { ; GCN-LABEL: indirect_lds_id: ; GCN: ; %bb.0: ; GCN-NEXT: s_mov_b32 s32, 0 @@ -59,11 +59,11 @@ define amdgpu_kernel void @indirect_lds_id(i32 addrspace(1)* %out) !llvm.amdgcn. ; GCN-NEXT: v_mov_b32_e32 v1, s5 ; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7] ; GCN-NEXT: s_endpgm - call void @function_lds_id(i32 addrspace(1) * %out) + call void @function_lds_id(ptr addrspace(1) %out) ret void } -define amdgpu_kernel void @doesnt_use_it(i32 addrspace(1)* %out) !llvm.amdgcn.lds.kernel.id !0 { +define amdgpu_kernel void @doesnt_use_it(ptr addrspace(1) %out) !llvm.amdgcn.lds.kernel.id !0 { ; GCN-LABEL: doesnt_use_it: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 @@ -73,7 +73,7 @@ define amdgpu_kernel void @doesnt_use_it(i32 addrspace(1)* %out) !llvm.amdgcn.ld ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: s_endpgm - store i32 100, i32 addrspace(1)* %out + store i32 100, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll index 294d11abbecdbb..92bdbe5d801ccf 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.lerp.ll @@ -5,9 +5,9 @@ declare i32 @llvm.amdgcn.lerp(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_lerp: ; GCN: v_lerp_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_lerp(i32 addrspace(1)* %out, i32 %src) nounwind { +define amdgpu_kernel void @v_lerp(ptr addrspace(1) %out, i32 %src) nounwind { %result= call i32 @llvm.amdgcn.lerp(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll index a96127d65c29fa..7ed8b8160d03a9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.log.clamp.ll @@ -7,9 +7,9 @@ declare float @llvm.amdgcn.log.clamp.f32(float) #0 ; GCN-LABEL: {{^}}v_log_clamp_f32: ; GCN: v_log_clamp_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @v_log_clamp_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @v_log_clamp_f32(ptr addrspace(1) %out, float %src) #1 { %log.clamp = call float @llvm.amdgcn.log.clamp.f32(float %src) #0 - store float %log.clamp, float addrspace(1)* %out + store float %log.clamp, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll index 88d6bea38b1004..45c3a1a32c682e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mbcnt.ll @@ -5,7 +5,7 @@ ; GCN: v_mbcnt_lo_u32_b32{{(_e64)*}} [[LO:v[0-9]+]], -1, 0 ; SI: v_mbcnt_hi_u32_b32_e32 {{v[0-9]+}}, -1, [[LO]] ; VI: v_mbcnt_hi_u32_b32 {{v[0-9]+}}, -1, [[LO]] -define amdgpu_ps void @mbcnt_intrinsics(<16 x i8> addrspace(4)* inreg %arg, <16 x i8> addrspace(4)* inreg %arg1, <32 x i8> addrspace(4)* inreg %arg2, i32 inreg %arg3) { +define amdgpu_ps void @mbcnt_intrinsics(ptr addrspace(4) inreg %arg, ptr addrspace(4) inreg %arg1, ptr addrspace(4) inreg %arg2, i32 inreg %arg3) { main_body: %lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 %hi = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lo) #0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll index 95adef63bc6cb1..c9cee8be59e0d2 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.bf16.ll @@ -52,13 +52,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x2bf16(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %in.1 = load <32 x float>, ptr addrspace(1) %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x2bf16(<2 x i16> %a, <2 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -73,13 +73,13 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_16x16x2bf16(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x2bf16(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x2bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -94,13 +94,13 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 v{{[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_4x4x2bf16(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x2bf16(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x2bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -115,13 +115,13 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x4bf16(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x4bf16(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16(<2 x i16> %a, <2 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -136,13 +136,13 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 v{{[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_16x16x8bf16(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x8bf16(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i32 1 to <2 x i16> %b = bitcast i32 2 to <2 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8bf16(<2 x i16> %a, <2 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll index 72a43274a435ca..e28fba285cb7cb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll @@ -20,13 +20,13 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %in.1 = load <32 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -39,13 +39,13 @@ bb: ; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -58,13 +58,13 @@ bb: ; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -77,13 +77,13 @@ bb: ; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8bf16.1k(<4 x i16> %a, <4 x i16> %b, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -96,13 +96,13 @@ bb: ; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9+]}}], v[[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %a = bitcast i64 1 to <4 x i16> %b = bitcast i64 2 to <4 x i16> %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16bf16.1k(<4 x i16> %a, <4 x i16> %b, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -112,11 +112,11 @@ bb: ; GFX940: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}} ; GFX940: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx2 -define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: %mai.1 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double 0.0, i32 0, i32 0, i32 0) %mai.2 = tail call double @llvm.amdgcn.mfma.f64.4x4x4f64(double %a, double %b, double %mai.1, i32 1, i32 2, i32 3) - store double %mai.2, double addrspace(1)* %arg + store double %mai.2, ptr addrspace(1) %arg ret void } @@ -126,11 +126,11 @@ bb: ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: - %in.1 = load <4 x double>, <4 x double> addrspace(1)* %arg + %in.1 = load <4 x double>, ptr addrspace(1) %arg %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %in.1, i32 1, i32 2, i32 3) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } @@ -141,11 +141,11 @@ bb: ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0] ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) %mai.2 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> %mai.1, i32 1, i32 2, i32 3) - store <4 x double> %mai.2, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.2, ptr addrspace(1) %arg ret void } @@ -154,10 +154,10 @@ bb: ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } @@ -168,10 +168,10 @@ bb: ; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}} ; GCN: global_store_dwordx4 ; GCN: global_store_dwordx4 -define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 { +define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %arg, double %a, double %b) #0 { bb: %mai.1 = tail call <4 x double> @llvm.amdgcn.mfma.f64.16x16x4f64(double %a, double %b, <4 x double> , i32 0, i32 0, i32 0) - store <4 x double> %mai.1, <4 x double> addrspace(1)* %arg + store <4 x double> %mai.1, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll index bec1fb3ad2d86d..d5b6d4449d6758 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll @@ -40,11 +40,11 @@ declare <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32>, <4 x i3 ; GISEL: v_mfma_i32_16x16x32_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_i32_16x16x32i8(<4 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_16x16x32i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %arg + %in.1 = load <4 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 4294967298, i64 12884901892, <4 x i32> %in.1, i32 1, i32 2, i32 3) - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %arg + store <4 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -58,11 +58,11 @@ bb: ; GISEL: v_mfma_i32_32x32x16_i8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_i32_32x32x16i8(<16 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_32x32x16i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x i32>, <16 x i32> addrspace(1)* %arg + %in.1 = load <16 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 4294967298, i64 12884901892, <16 x i32> %in.1, i32 1, i32 2, i32 3) - store <16 x i32> %mai.1, <16 x i32> addrspace(1)* %arg + store <16 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -76,11 +76,11 @@ bb: ; GISEL: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> , <2 x float> , <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -94,11 +94,11 @@ bb: ; GISEL: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> , <2 x float> , <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -112,11 +112,11 @@ bb: ; GISEL: v_mfma_f32_16x16x32_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_bf8(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_bf8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -130,11 +130,11 @@ bb: ; GISEL: v_mfma_f32_16x16x32_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_fp8(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x32_bf8_fp8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -148,11 +148,11 @@ bb: ; GISEL: v_mfma_f32_16x16x32_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_bf8(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_bf8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -166,11 +166,11 @@ bb: ; GISEL: v_mfma_f32_16x16x32_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_fp8(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x32_fp8_fp8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 4294967298, i64 12884901892, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -184,11 +184,11 @@ bb: ; GISEL: v_mfma_f32_32x32x16_bf8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_bf8(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_bf8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -202,11 +202,11 @@ bb: ; GISEL: v_mfma_f32_32x32x16_bf8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_fp8(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x16_bf8_fp8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -220,11 +220,11 @@ bb: ; GISEL: v_mfma_f32_32x32x16_fp8_bf8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_bf8(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_bf8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -238,11 +238,11 @@ bb: ; GISEL: v_mfma_f32_32x32x16_fp8_fp8 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3 ; GCN-NOT: v_accvgpr_read_b32 ; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_fp8(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x16_fp8_fp8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 4294967298, i64 12884901892, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -254,11 +254,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x32_f16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(<4 x float> addrspace(1)* %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_f32_16x16x32_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -273,11 +273,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(<16 x float> addrspace(1)* %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_f32_32x32x16_f16(ptr addrspace(1) %arg, <4 x half> %a, <8 x half> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -289,11 +289,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x32_bf16 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(<4 x float> addrspace(1)* %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_f32_16x16x32_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -308,11 +308,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(<16 x float> addrspace(1)* %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_f32_32x32x16_bf16(ptr addrspace(1) %arg, <4 x i16> %a, <8 x i16> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -324,11 +324,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_i32_16x16x64_i8 [[CD]][[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]][[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_i32_16x16x64_i8(<4 x i32> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_16x16x64_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %arg + %in.1 = load <4 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <4 x i32> @llvm.amdgcn.smfmac.i32.16x16x64.i8(<2 x i32> %a, <4 x i32> %b, <4 x i32> %in.1, i32 %idx, i32 1, i32 2) - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %arg + store <4 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -343,11 +343,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_i32_32x32x32_i8(<16 x i32> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_32x32x32_i8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x i32>, <16 x i32> addrspace(1)* %arg + %in.1 = load <16 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <16 x i32> @llvm.amdgcn.smfmac.i32.32x32x32.i8(<2 x i32> %a, <4 x i32> %b, <16 x i32> %in.1, i32 %idx, i32 1, i32 2) - store <16 x i32> %mai.1, <16 x i32> addrspace(1)* %arg + store <16 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -359,11 +359,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x64_bf8_bf8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_bf8(<4 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -375,11 +375,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x64_bf8_fp8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_fp8(<4 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_16x16x64_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -391,11 +391,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x64_fp8_bf8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_bf8(<4 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -407,11 +407,11 @@ bb: ; AGPRCD-DAG: v_accvgpr_write_b32 a[[RHI:[0-9]+]], s[[SHI]]{{$}} ; GCN: v_smfmac_f32_16x16x64_fp8_fp8 [[CD]]{{\[}}[[RLO]]:[[RHI]]], {{[av]}}[{{[0-9:]+}}], {{[av]}}[{{[0-9:]+}}], v{{[0-9]+}} cbsz:1 abid:2 ; GCN: global_store_dwordx4 v{{[0-9]+}}, [[CD]]{{\[}}[[RLO]]:[[RHI]]] -define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_fp8(<4 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_16x16x64_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x64.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <4 x float> %in.1, i32 %idx, i32 1, i32 2) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -426,11 +426,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_bf8(<16 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -445,11 +445,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_fp8(<16 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_32x32x32_bf8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.bf8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -464,11 +464,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_bf8(<16 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_bf8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.bf8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -483,11 +483,11 @@ bb: ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:16 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9:]+}}], s[{{[0-9:]+}}] offset:32 ; GCN-DAG: global_store_dwordx4 v{{[0-9]+}}, [[CD]][{{[0-9]+}}:[[RHI]]], s[{{[0-9:]+}}] offset:48 -define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_fp8(<16 x float> addrspace(1)* %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { +define amdgpu_kernel void @test_smfmac_i32_32x32x32_fp8_fp8(ptr addrspace(1) %arg, <2 x i32> %a, <4 x i32> %b, i32 %idx) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x32.fp8.fp8(<2 x i32> %a, <4 x i32> %b, <16 x float> %in.1, i32 %idx, i32 1, i32 2) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll index ee041ca226c3c1..b5beaa504297cb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.i8.ll @@ -16,11 +16,11 @@ declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32, i32, <4 x i32>, i32, i32 ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_i32_32x32x8i8(<16 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_32x32x8i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x i32>, <16 x i32> addrspace(1)* %arg + %in.1 = load <16 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x8i8(i32 1, i32 2, <16 x i32> %in.1, i32 1, i32 2, i32 3) - store <16 x i32> %mai.1, <16 x i32> addrspace(1)* %arg + store <16 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -35,11 +35,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 v{{[0-9]+}}, [[RES]] -define amdgpu_kernel void @test_mfma_i32_16x16x16i8(<4 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_16x16x16i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %arg + %in.1 = load <4 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x16i8(i32 1, i32 2, <4 x i32> %in.1, i32 1, i32 2, i32 3) - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %arg + store <4 x i32> %mai.1, ptr addrspace(1) %arg ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll index 5dabf3d7d9021c..d3294c61beb4ab 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll @@ -68,11 +68,11 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GFX908-COUNT-2: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %in.1 = load <32 x float>, ptr addrspace(1) %arg %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -88,11 +88,11 @@ bb: ; GFX908-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_16x16x1f32(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x1f32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -108,11 +108,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]] -define amdgpu_kernel void @test_mfma_f32_4x4x1f32(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -128,11 +128,11 @@ bb: ; GFX908-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}] ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x2f32(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x2f32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x2f32(float 1.0, float 2.0, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -148,11 +148,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_16x16x4f32(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x4f32(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x4f32(float 1.0, float 2.0, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -167,14 +167,14 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x4f16(<32 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 { bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg - %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c - %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 - %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p + %in.1 = load <32 x float>, ptr addrspace(1) %arg + %c.1 = load <4 x half>, ptr addrspace(1) %c + %c2p = getelementptr <4 x half>, ptr addrspace(1) %c, i64 1 + %c.2 = load <4 x half>, ptr addrspace(1) %c2p %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x4f16(<4 x half> %c.1, <4 x half> %c.2, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -188,14 +188,14 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_16x16x4f16(<16 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg - %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c - %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 - %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p + %in.1 = load <16 x float>, ptr addrspace(1) %arg + %c.1 = load <4 x half>, ptr addrspace(1) %c + %c2p = getelementptr <4 x half>, ptr addrspace(1) %c, i64 1 + %c.2 = load <4 x half>, ptr addrspace(1) %c2p %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x4f16(<4 x half> %c.1, <4 x half> %c.2, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -210,14 +210,14 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_4x4x4f16(<4 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x4f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg - %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c - %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 - %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p + %in.1 = load <4 x float>, ptr addrspace(1) %arg + %c.1 = load <4 x half>, ptr addrspace(1) %c + %c2p = getelementptr <4 x half>, ptr addrspace(1) %c, i64 1 + %c.2 = load <4 x half>, ptr addrspace(1) %c2p %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x4f16(<4 x half> %c.1, <4 x half> %c.2, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -233,14 +233,14 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x8f16(<16 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x8f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg - %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c - %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 - %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p + %in.1 = load <16 x float>, ptr addrspace(1) %arg + %c.1 = load <4 x half>, ptr addrspace(1) %c + %c2p = getelementptr <4 x half>, ptr addrspace(1) %c, i64 1 + %c.2 = load <4 x half>, ptr addrspace(1) %c2p %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> %c.1, <4 x half> %c.2, <16 x float> %in.1, i32 1, i32 2, i32 3) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -255,14 +255,14 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_16x16x16f16(<4 x float> addrspace(1)* %arg, <4 x half> addrspace(1)* %c) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x16f16(ptr addrspace(1) %arg, ptr addrspace(1) %c) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg - %c.1 = load <4 x half>, <4 x half> addrspace(1)* %c - %c2p = getelementptr <4 x half>, <4 x half> addrspace(1)* %c, i64 1 - %c.2 = load <4 x half>, <4 x half> addrspace(1)* %c2p + %in.1 = load <4 x float>, ptr addrspace(1) %arg + %c.1 = load <4 x half>, ptr addrspace(1) %c + %c2p = getelementptr <4 x half>, ptr addrspace(1) %c, i64 1 + %c.2 = load <4 x half>, ptr addrspace(1) %c2p %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x16f16(<4 x half> %c.1, <4 x half> %c.2, <4 x float> %in.1, i32 1, i32 2, i32 3) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -310,11 +310,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_i32_32x32x4i8(<32 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_32x32x4i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <32 x i32>, <32 x i32> addrspace(1)* %arg + %in.1 = load <32 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <32 x i32> @llvm.amdgcn.mfma.i32.32x32x4i8(i32 1, i32 2, <32 x i32> %in.1, i32 1, i32 2, i32 3) - store <32 x i32> %mai.1, <32 x i32> addrspace(1)* %arg + store <32 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -330,11 +330,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_i32_16x16x4i8(<16 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_16x16x4i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x i32>, <16 x i32> addrspace(1)* %arg + %in.1 = load <16 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <16 x i32> @llvm.amdgcn.mfma.i32.16x16x4i8(i32 1, i32 2, <16 x i32> %in.1, i32 1, i32 2, i32 3) - store <16 x i32> %mai.1, <16 x i32> addrspace(1)* %arg + store <16 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -350,11 +350,11 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_i32_4x4x4i8(<4 x i32> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_i32_4x4x4i8(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x i32>, <4 x i32> addrspace(1)* %arg + %in.1 = load <4 x i32>, ptr addrspace(1) %arg %mai.1 = tail call <4 x i32> @llvm.amdgcn.mfma.i32.4x4x4i8(i32 1, i32 2, <4 x i32> %in.1, i32 1, i32 2, i32 3) - store <4 x i32> %mai.1, <4 x i32> addrspace(1)* %arg + store <4 x i32> %mai.1, ptr addrspace(1) %arg ret void } @@ -363,12 +363,12 @@ bb: ; GFX908_A-NEXT: v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] ; GFX940: v_mfma_f32_32x32x1_2b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX940-NEXT: v_mfma_f32_32x32x1_2b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg + %in.1 = load <32 x float>, ptr addrspace(1) %arg %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0) %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %mai.1, i32 0, i32 0, i32 0) - store <32 x float> %mai.2, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.2, ptr addrspace(1) %arg ret void } @@ -377,12 +377,12 @@ bb: ; GFX908_A-NEXT: v_mfma_f32_16x16x1f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] ; GFX940: v_mfma_f32_16x16x1_4b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX940-NEXT: v_mfma_f32_16x16x1_4b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -define amdgpu_kernel void @test_mfma_f32_16x16x1f32_forward_acc(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg + %in.1 = load <16 x float>, ptr addrspace(1) %arg %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 0, i32 0, i32 0) %mai.2 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %mai.1, i32 0, i32 0, i32 0) - store <16 x float> %mai.2, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.2, ptr addrspace(1) %arg ret void } @@ -392,12 +392,12 @@ bb: ; GFX940: v_mfma_f32_4x4x1_16b_f32 [[MAI1:a\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, v{{[0-9]+}}, a[{{[0-9]+:[0-9]+}}] ; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_mfma_f32_4x4x1_16b_f32 a[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, v{{[0-9]+}}, [[MAI1]] -define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32_forward_acc(ptr addrspace(1) %arg) #0 { bb: - %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg + %in.1 = load <4 x float>, ptr addrspace(1) %arg %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 0, i32 0, i32 0) %mai.2 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %mai.1, i32 0, i32 0, i32 0) - store <4 x float> %mai.2, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.2, ptr addrspace(1) %arg ret void } @@ -416,10 +416,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm_splat(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm_splat(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> , i32 0, i32 0, i32 0) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -435,10 +435,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> , i32 0, i32 0, i32 0) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -454,10 +454,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x8f16(<4 x half> , <4 x half> , <16 x float> , i32 0, i32 0, i32 0) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -473,10 +473,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -493,10 +493,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]], -define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32_imm(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> , i32 0, i32 0, i32 0) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -511,10 +511,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-4: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(<16 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> , i32 0, i32 0, i32 0) - store <16 x float> %mai.1, <16 x float> addrspace(1)* %arg + store <16 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -587,10 +587,10 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A-COUNT-8: global_store_dwordx4 {{v[0-9]+}}, a[{{[0-9:]+}}], -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm(ptr addrspace(1) %arg) #0 { bb: %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + store <32 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -610,13 +610,13 @@ bb: ; GFX908: global_store_dwordx4 ; GFX90A-NOT: v_accvgpr_read_b32 ; GFX90A: global_store_dwordx4 {{v[0-9]+}}, [[RES]] -define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat(<4 x float> addrspace(1)* %arg, i64 %idx) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat(ptr addrspace(1) %arg, i64 %idx) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i32 %tid + %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> , i32 0, i32 0, i32 0) - ;store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg - store <4 x float> %mai.1, <4 x float> addrspace(1)* %gep + ;store <4 x float> %mai.1, ptr addrspace(1) %arg + store <4 x float> %mai.1, ptr addrspace(1) %gep ret void } @@ -634,13 +634,13 @@ bb: ; GFX908-COUNT-4: v_accvgpr_read_b32 ; GFX908: global_store_dwordx4 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}] ; GFX90A_40: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat_bad_code(<4 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_4x4x1f32_lit_splat_bad_code(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <4 x float>, <4 x float> addrspace(1)* %arg, i32 %tid + %gep = getelementptr inbounds <4 x float>, ptr addrspace(1) %arg, i32 %tid %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> , i32 0, i32 0, i32 0) - store <4 x float> %mai.1, <4 x float> addrspace(1)* %arg + store <4 x float> %mai.1, ptr addrspace(1) %arg ret void } @@ -659,13 +659,13 @@ bb: ; GFX908-COUNT-8: global_store_dwordx4 ; GFX90A_40-NOT: v_accvgpr_read_b32 ; GFX90A_40-COUNT-5: global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}] -define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(<32 x float> addrspace(1)* %arg) #0 { +define amdgpu_kernel void @test_mfma_f32_32x32x1f32_vecarg(ptr addrspace(1) %arg) #0 { bb: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %arg, i32 %tid - %in.1 = load <32 x float>, <32 x float> addrspace(1)* %gep + %gep = getelementptr inbounds <32 x float>, ptr addrspace(1) %arg, i32 %tid + %in.1 = load <32 x float>, ptr addrspace(1) %gep %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 1, i32 2, i32 3) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %gep + store <32 x float> %mai.1, ptr addrspace(1) %gep ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll index 481161e68a669c..acb6398a57aa1e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -11,9 +11,9 @@ ; PREGFX10: s_nop 1 ; VI-OPT: v_mov_b32_dpp v0, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x00,0x01,0x08,0x11] ; VI-NOOPT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; encoding: [0xfa,0x02,0x00,0x7e,0x01,0x01,0x08,0x11] -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } @@ -26,10 +26,10 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in) { ; PREGFX10: s_nop 1 ; VI-OPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; VI-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 -define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp_wait_states(ptr addrspace(1) %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %in, i32 1, i32 1, i32 1, i1 1) #0 %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0 - store i32 %tmp1, i32 addrspace(1)* %out + store i32 %tmp1, ptr addrspace(1) %out ret void } @@ -43,17 +43,17 @@ define amdgpu_kernel void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { ; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 ; PREGFX10: s_nop 1 ; VI: v_mov_b32_dpp v{{[0-9]+}}, [[VGPR1]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1 -define amdgpu_kernel void @dpp_first_in_bb(float addrspace(1)* %out, float addrspace(1)* %in, float %cond, float %a, float %b) { +define amdgpu_kernel void @dpp_first_in_bb(ptr addrspace(1) %out, ptr addrspace(1) %in, float %cond, float %a, float %b) { %cmp = fcmp oeq float %cond, 0.0 br i1 %cmp, label %if, label %else if: - %out_val = load float, float addrspace(1)* %out + %out_val = load float, ptr addrspace(1) %out %if_val = fadd float %a, %out_val br label %endif else: - %in_val = load float, float addrspace(1)* %in + %in_val = load float, ptr addrspace(1) %in %else_val = fadd float %b, %in_val br label %endif @@ -64,16 +64,16 @@ endif: %tmp1 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp0, i32 1, i32 1, i32 1, i1 1) #0 %tmp2 = call i32 @llvm.amdgcn.mov.dpp.i32(i32 %tmp1, i32 1, i32 1, i32 1, i1 1) #0 %tmp_float = bitcast i32 %tmp2 to float - store float %tmp_float, float addrspace(1)* %out + store float %tmp_float, ptr addrspace(1) %out ret void } ; VI-LABEL: {{^}}mov_dpp64_test: ; VI: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; VI: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 -define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) { +define amdgpu_kernel void @mov_dpp64_test(ptr addrspace(1) %out, i64 %in1) { %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 %in1, i32 1, i32 1, i32 1, i1 0) #0 - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } @@ -85,9 +85,9 @@ define amdgpu_kernel void @mov_dpp64_test(i64 addrspace(1)* %out, i64 %in1) { ; VI-OPT-DAG: v_mov_b32_dpp v[[OLD_LO]], v[[OLD_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; VI-OPT-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[OLD_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 ; VI-NOOPT-COUNT-2: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 -define amdgpu_kernel void @mov_dpp64_imm_test(i64 addrspace(1)* %out) { +define amdgpu_kernel void @mov_dpp64_imm_test(ptr addrspace(1) %out) { %tmp0 = call i64 @llvm.amdgcn.mov.dpp.i64(i64 123451234512345, i32 1, i32 1, i32 1, i1 0) #0 - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll index b49d188df23945..fc94f95f3f4793 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp8.ll @@ -6,9 +6,9 @@ ; GFX10PLUS-LABEL: {{^}}dpp8_test: ; GFX10PLUS: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} ; GFX10PLUS: v_mov_b32_dpp [[SRC]], [[SRC]] dpp8:[1,0,0,0,0,0,0,0]{{$}} -define amdgpu_kernel void @dpp8_test(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp8_test(ptr addrspace(1) %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0 - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } @@ -17,10 +17,10 @@ define amdgpu_kernel void @dpp8_test(i32 addrspace(1)* %out, i32 %in) { ; GFX10PLUS: v_mov_b32_e32 [[VGPR0:v[0-9]+]], s{{[0-9]+}} ; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[1,0,0,0,0,0,0,0]{{$}} ; GFX10PLUS: v_mov_b32_dpp [[VGPR0]], [[VGPR0]] dpp8:[5,0,0,0,0,0,0,0]{{$}} -define amdgpu_kernel void @dpp8_wait_states(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @dpp8_wait_states(ptr addrspace(1) %out, i32 %in) { %tmp0 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %in, i32 1) #0 %tmp1 = call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %tmp0, i32 5) #0 - store i32 %tmp1, i32 addrspace(1)* %out + store i32 %tmp1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll index 83bc8b2347245b..5bf6c9362bed8a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.pk.u16.u8.ll @@ -7,11 +7,11 @@ declare i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64, i32, i64) #0 ; GCN: v_mqsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN-DAG: v_mov_b32_e32 v5, v1 ; GCN-DAG: v_mov_b32_e32 v4, v0 -define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_mqsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 %tmp1 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 - store i64 %tmp2, i64 addrspace(1)* %out, align 4 + store i64 %tmp2, ptr addrspace(1) %out, align 4 ret void } @@ -19,13 +19,13 @@ define amdgpu_kernel void @v_mqsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { ; GCN: v_mqsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] ; GCN-DAG: v_mov_b32_e32 v3, v1 ; GCN-DAG: v_mov_b32_e32 v2, v0 -define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { +define amdgpu_kernel void @v_mqsad_pk_u16_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, i64 %b) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 %tmp3 = call i64 @llvm.amdgcn.mqsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 - store i64 %tmp4, i64 addrspace(1)* %out, align 4 + store i64 %tmp4, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll index 685b5e0f29c423..488866e25676f1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mqsad.u32.u8.ll @@ -7,12 +7,12 @@ declare <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64, i32, <4 x i32>) #0 ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { +define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(ptr addrspace(1) %out, i64 %src, i32 %a) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 - store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4 ret void } @@ -20,12 +20,12 @@ define amdgpu_kernel void @v_mqsad_u32_u8_inline_integer_immediate(<4 x i32> add ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> %b) { +define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, <4 x i32> %b) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %b) #0 %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 - store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4 ret void } @@ -33,12 +33,12 @@ define amdgpu_kernel void @v_mqsad_u32_u8_non_immediate(<4 x i32> addrspace(1)* ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a) { +define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(ptr addrspace(1) %out, i64 %src, i32 %a) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> ) #0 %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 - store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4 ret void } @@ -46,13 +46,13 @@ define amdgpu_kernel void @v_mqsad_u32_u8_inline_fp_immediate(<4 x i32> addrspac ; GCN-DAG: v_mov_b32_e32 v0, v2 ; GCN-DAG: v_mov_b32_e32 v1, v3 ; GCN: v_mqsad_u32_u8 v[2:5], v[0:1], v6, v[{{[0-9]+:[0-9]+}}] -define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(<4 x i32> addrspace(1)* %out, i64 %src, i32 %a, <4 x i32> addrspace(1)* %input) { - %in = load <4 x i32>, <4 x i32> addrspace(1) * %input +define amdgpu_kernel void @v_mqsad_u32_u8_use_sgpr_vgpr(ptr addrspace(1) %out, i64 %src, i32 %a, ptr addrspace(1) %input) { + %in = load <4 x i32>, ptr addrspace(1) %input %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call <4 x i32> @llvm.amdgcn.mqsad.u32.u8(i64 %tmp, i32 %tmp1, <4 x i32> %in) #0 %tmp3 = call <4 x i32> asm ";; force constraint", "=v,{v[2:5]}"(<4 x i32> %tmp2) #0 - store <4 x i32> %tmp3, <4 x i32> addrspace(1)* %out, align 4 + store <4 x i32> %tmp3, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll index 2eb317900b5cef..ee9d8662da1880 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.msad.u8.ll @@ -5,17 +5,17 @@ declare i32 @llvm.amdgcn.msad.u8(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_msad_u8: ; GCN: v_msad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_msad_u8(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_msad_u8(ptr addrspace(1) %out, i32 %src) { %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_msad_u8_non_immediate: ; GCN: v_msad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_msad_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { +define amdgpu_kernel void @v_msad_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) { %result= call i32 @llvm.amdgcn.msad.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll index a1dbe9a1322e82..b58e5714cd1483 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.i24.ll @@ -2,9 +2,9 @@ ; GCN-LABEL: {{^}}test_mul_i24: ; GCN: v_mul_i32_i24 -define amdgpu_kernel void @test_mul_i24(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @test_mul_i24(ptr addrspace(1) %out, i32 %src1, i32 %src2) #1 { %val = call i32 @llvm.amdgcn.mul.i24(i32 %src1, i32 %src2) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll index 810b50337e2b93..58efab855eb32e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mul.u24.ll @@ -2,9 +2,9 @@ ; GCN-LABEL: {{^}}test_mul_u24: ; GCN: v_mul_u32_u24 -define amdgpu_kernel void @test_mul_u24(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @test_mul_u24(ptr addrspace(1) %out, i32 %src1, i32 %src2) #1 { %val = call i32 @llvm.amdgcn.mul.u24(i32 %src1, i32 %src2) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll index 4d9ba39b8e0b85..bb3bf2be364d2c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll @@ -5,41 +5,41 @@ declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_perm_b32_v_v_v: ; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2 -define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 { +define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, ptr addrspace(1) %out) #1 { %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_perm_b32_v_v_c: ; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}} -define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 { +define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, ptr addrspace(1) %out) #1 { %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_perm_b32_s_v_c: ; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}} -define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 { +define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, ptr addrspace(1) %out) #1 { %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_perm_b32_s_s_c: ; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 { +define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, ptr addrspace(1) %out) #1 { %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_perm_b32_v_s_i: ; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1 -define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 { +define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, ptr addrspace(1) %out) #1 { %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0 - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll index ca48ce8a08c4ab..c3c0d6c58bbcf8 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane.ll @@ -11,18 +11,18 @@ declare i32 @llvm.amdgcn.workitem.id.y() ; GCN-LABEL: {{^}}v_permlane16_b32_vss: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlane16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_vii: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}} -define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlane16_b32_vii(ptr addrspace(1) %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -31,9 +31,9 @@ define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} -define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlane16_b32_vll(ptr addrspace(1) %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -45,11 +45,11 @@ define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src ; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]] ; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]] ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} -define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlane16_b32_vvv(ptr addrspace(1) %out, i32 %src0) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %tidy = call i32 @llvm.amdgcn.workitem.id.y() %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -58,10 +58,10 @@ define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src ; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_vvs(ptr addrspace(1) %out, i32 %src0, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -70,55 +70,55 @@ define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src ; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}} ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}} -define amdgpu_kernel void @v_permlane16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { +define amdgpu_kernel void @v_permlane16_b32_vsv(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { %tidy = call i32 @llvm.amdgcn.workitem.id.y() %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_vss_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_vss_bc: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_vss_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi_bc: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_vss: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vss(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_vii: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vii(ptr addrspace(1) %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -127,9 +127,9 @@ define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %sr ; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], 0xc1d1{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vll(ptr addrspace(1) %out, i32 %src0) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -141,11 +141,11 @@ define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %sr ; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]] ; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]] ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vvv(ptr addrspace(1) %out, i32 %src0) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %tidy = call i32 @llvm.amdgcn.workitem.id.y() %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -154,10 +154,10 @@ define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %sr ; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vvs(ptr addrspace(1) %out, i32 %src0, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } @@ -166,153 +166,153 @@ define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %sr ; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}} ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vsv(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { %tidy = call i32 @llvm.amdgcn.workitem.id.y() %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vss_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_bc: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vss_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi_bc: ; GFX10PLUS-NOT: v_readfirstlane_b32 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_tid_tid: ; GFX10PLUS: v_permlane16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlane16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_undef_tid: ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlane16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid: ; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039 ; GFX10PLUS: v_permlane16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlane16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_bc: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi_bc: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} -define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_tid_tid: ; GFX10PLUS: v_permlanex16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlanex16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_undef_tid: ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlanex16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid: ; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039 ; GFX10PLUS: v_permlanex16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} -define amdgpu_kernel void @v_permlanex16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_bc: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } ; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi_bc: ; GFX10PLUS-NOT: 0x3039 ; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} -define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { +define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #1 { %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll index 05b535a04f3e92..3e80947f0e578e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.permlane64.ll @@ -5,7 +5,7 @@ declare i32 @llvm.amdgcn.permlane64(i32) declare i32 @llvm.amdgcn.workitem.id.x() -define amdgpu_kernel void @test_s(i32 addrspace(1)* %out, i32 %src0) { +define amdgpu_kernel void @test_s(ptr addrspace(1) %out, i32 %src0) { ; GFX11-LABEL: test_s: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_clause 0x1 @@ -19,11 +19,11 @@ define amdgpu_kernel void @test_s(i32 addrspace(1)* %out, i32 %src0) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %v = call i32 @llvm.amdgcn.permlane64(i32 %src0) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_i(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_i(ptr addrspace(1) %out) { ; GFX11-LABEL: test_i: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -35,11 +35,11 @@ define amdgpu_kernel void @test_i(i32 addrspace(1)* %out) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %v = call i32 @llvm.amdgcn.permlane64(i32 99) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_v(i32 addrspace(1)* %out, i32 %src0) #1 { +define amdgpu_kernel void @test_v(ptr addrspace(1) %out, i32 %src0) #1 { ; GFX11-SDAG-LABEL: test_v: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -61,6 +61,6 @@ define amdgpu_kernel void @test_v(i32 addrspace(1)* %out, i32 %src0) #1 { ; GFX11-GISEL-NEXT: s_endpgm %tidx = call i32 @llvm.amdgcn.workitem.id.x() %v = call i32 @llvm.amdgcn.permlane64(i32 %tidx) - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll index 1f46613a8db0d3..634af67662fa37 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.qsad.pk.u16.u8.ll @@ -7,11 +7,11 @@ declare i64 @llvm.amdgcn.qsad.pk.u16.u8(i64, i32, i64) #0 ; GCN: v_qsad_pk_u16_u8 v[0:1], v[4:5], s{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] ; GCN-DAG: v_mov_b32_e32 v5, v1 ; GCN-DAG: v_mov_b32_e32 v4, v0 -define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { +define amdgpu_kernel void @v_qsad_pk_u16_u8(ptr addrspace(1) %out, i64 %src) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[4:5]},v"(i64 %src) #0 %tmp1 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 100, i64 100) #0 %tmp2 = call i64 asm ";; force constraint", "=v,{v[4:5]}"(i64 %tmp1) #0 - store i64 %tmp2, i64 addrspace(1)* %out, align 4 + store i64 %tmp2, ptr addrspace(1) %out, align 4 ret void } @@ -19,13 +19,13 @@ define amdgpu_kernel void @v_qsad_pk_u16_u8(i64 addrspace(1)* %out, i64 %src) { ; GCN: v_qsad_pk_u16_u8 v[0:1], v[2:3], v4, v[6:7] ; GCN-DAG: v_mov_b32_e32 v3, v1 ; GCN-DAG: v_mov_b32_e32 v2, v0 -define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(i64 addrspace(1)* %out, i64 %src, i32 %a, i64 %b) { +define amdgpu_kernel void @v_qsad_pk_u16_u8_non_immediate(ptr addrspace(1) %out, i64 %src, i32 %a, i64 %b) { %tmp = call i64 asm "v_lsrlrev_b64 $0, $1, 1", "={v[2:3]},v"(i64 %src) #0 %tmp1 = call i32 asm "v_mov_b32 $0, $1", "={v4},v"(i32 %a) #0 %tmp2 = call i64 asm "v_lshlrev_b64 $0, $1, 1", "={v[6:7]},v"(i64 %b) #0 %tmp3 = call i64 @llvm.amdgcn.qsad.pk.u16.u8(i64 %tmp, i32 %tmp1, i64 %tmp2) #0 %tmp4 = call i64 asm ";; force constraint", "=v,{v[2:3]}"(i64 %tmp3) #0 - store i64 %tmp4, i64 addrspace(1)* %out, align 4 + store i64 %tmp4, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll index c7d1bbb3b47957..312ac2142101a4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.queue.ptr.ll @@ -6,14 +6,13 @@ ; GCN-LABEL: {{^}}test: ; GCN: enable_sgpr_queue_ptr = 1 ; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0 -define amdgpu_kernel void @test(i32 addrspace(1)* %out) { - %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 - %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* - %value = load i32, i32 addrspace(4)* %header_ptr - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test(ptr addrspace(1) %out) { + %queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 + %value = load i32, ptr addrspace(4) %queue_ptr + store i32 %value, ptr addrspace(1) %out ret void } -declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 +declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0 attributes #0 = { nounwind readnone } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll index 6dfec8cb7fa33e..48ec7cfea52c72 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.lds.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN -declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) +declare void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) -define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { +define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; GCN-LABEL: buffer_load_lds_dword: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -18,15 +18,14 @@ define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspac ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: ; return to shader part epilog main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 0, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 0, i32 0, i32 4, i32 1) - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 0, i32 0, i32 8, i32 2) - %ptr = bitcast i8 addrspace(3)* %lds to float addrspace(3)* - %res = load float, float addrspace(3)* %ptr + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 4, i32 1) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 0, i32 8, i32 2) + %res = load float, ptr addrspace(3) %lds ret float %res } -define amdgpu_ps void @buffer_load_lds_dword_imm_voffset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { +define amdgpu_ps void @buffer_load_lds_dword_imm_voffset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; GCN-LABEL: buffer_load_lds_dword_imm_voffset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: v_mov_b32_e32 v0, 0x800 @@ -35,11 +34,11 @@ define amdgpu_ps void @buffer_load_lds_dword_imm_voffset(<4 x i32> inreg %rsrc, ; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 2048, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 2048, i32 0, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %voffset) { +define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset) { ; GCN-LABEL: buffer_load_lds_dword_v_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -47,11 +46,11 @@ define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %voffset, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %voffset, i32 0, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_s_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -59,11 +58,11 @@ define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword off, s[0:3], s5 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 0, i32 %soffset, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 0, i32 %soffset, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %voffset, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_vs_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -71,11 +70,11 @@ define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %voffset, i32 %soffset, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %voffset, i32 %soffset, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %voffset, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %voffset, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_vs_imm_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -83,11 +82,11 @@ define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc ; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 offen offset:2048 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %voffset, i32 %soffset, i32 2048, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %voffset, i32 %soffset, i32 2048, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { +define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; GCN-LABEL: buffer_load_lds_ushort: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: v_mov_b32_e32 v0, 0x800 @@ -96,11 +95,11 @@ define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspac ; GCN-NEXT: buffer_load_ushort v0, s[0:3], 0 offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 2, i32 2048, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 2, i32 2048, i32 0, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { +define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; GCN-LABEL: buffer_load_lds_ubyte: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -108,6 +107,6 @@ define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace ; GCN-NEXT: buffer_load_ubyte off, s[0:3], 0 offset:2048 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 1, i32 0, i32 0, i32 2048, i32 0) + call void @llvm.amdgcn.raw.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 1, i32 0, i32 0, i32 2048, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll index 51816da84d1d73..50b894ced52619 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll @@ -110,12 +110,12 @@ main_body: ; CHECK-LABEL: buffer_load_mmo: ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) { +define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, ptr addrspace(3) %lds) { entry: - store float 0.0, float addrspace(3)* %lds + store float 0.0, ptr addrspace(3) %lds %val = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.0, float addrspace(3)* %tmp2 + %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 + store float 0.0, ptr addrspace(3) %tmp2 ret float %val } @@ -350,10 +350,10 @@ main_body: ;CHECK-NEXT: buffer_load_{{ushort|u16}} [[VAL:v[0-9]+]], off, s[0:3], 0 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_{{write|store}}_b16 v0, [[VAL]] -define amdgpu_ps void @raw_buffer_load_f16(<4 x i32> inreg %rsrc, half addrspace(3)* %ptr) { +define amdgpu_ps void @raw_buffer_load_f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr) { main_body: %val = call half @llvm.amdgcn.raw.buffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - store half %val, half addrspace(3)* %ptr + store half %val, ptr addrspace(3) %ptr ret void } @@ -362,10 +362,10 @@ main_body: ;CHECK-NEXT: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]], off, s[0:3], 0 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_{{write|store}}_b32 v0, [[VAL]] -define amdgpu_ps void @raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, <2 x half> addrspace(3)* %ptr) { +define amdgpu_ps void @raw_buffer_load_v2f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr) { main_body: %val = call <2 x half> @llvm.amdgcn.raw.buffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - store <2 x half> %val, <2 x half> addrspace(3)* %ptr + store <2 x half> %val, ptr addrspace(3) %ptr ret void } @@ -374,10 +374,10 @@ main_body: ;CHECK-NEXT: buffer_load_{{dwordx2|b64}} [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_{{write|store}}_b64 v0, [[VAL]] -define amdgpu_ps void @raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, <4 x half> addrspace(3)* %ptr) { +define amdgpu_ps void @raw_buffer_load_v4f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr) { main_body: %val = call <4 x half> @llvm.amdgcn.raw.buffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - store <4 x half> %val, <4 x half> addrspace(3)* %ptr + store <4 x half> %val, ptr addrspace(3) %ptr ret void } @@ -386,10 +386,10 @@ main_body: ;CHECK-NEXT: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]], off, s[0:3], 0 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_{{write|store}}_b32 v0, [[VAL]] -define amdgpu_ps void @raw_buffer_load_v2i16(<4 x i32> inreg %rsrc, <2 x i16> addrspace(3)* %ptr) { +define amdgpu_ps void @raw_buffer_load_v2i16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr) { main_body: %val = call <2 x i16> @llvm.amdgcn.raw.buffer.load.v2i16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - store <2 x i16> %val, <2 x i16> addrspace(3)* %ptr + store <2 x i16> %val, ptr addrspace(3) %ptr ret void } @@ -398,10 +398,10 @@ main_body: ;CHECK-NEXT: buffer_load_{{dwordx2|b64}} [[VAL:v\[[0-9]+:[0-9]+\]]], off, s[0:3], 0 ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_{{write|store}}_b64 v0, [[VAL]] -define amdgpu_ps void @raw_buffer_load_v4i16(<4 x i32> inreg %rsrc, <4 x i16> addrspace(3)* %ptr) { +define amdgpu_ps void @raw_buffer_load_v4i16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr) { main_body: %val = call <4 x i16> @llvm.amdgcn.raw.buffer.load.v4i16(<4 x i32> %rsrc, i32 0, i32 0, i32 0) - store <4 x i16> %val, <4 x i16> addrspace(3)* %ptr + store <4 x i16> %val, ptr addrspace(3) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll index 0f1fa15f47cca0..f49c503cc77d68 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.rcp.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @rcp_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.rcp.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll index 71db76d902b79d..f175540d29c043 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.legacy.ll @@ -1,40 +1,40 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: not llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s -; ERROR: error: :0:0: in function rcp_legacy_f32 void (float addrspace(1)*, float): intrinsic not supported on subtarget +; ERROR: error: :0:0: in function rcp_legacy_f32 void (ptr addrspace(1), float): intrinsic not supported on subtarget declare float @llvm.amdgcn.rcp.legacy(float) #0 ; GCN-LABEL: {{^}}rcp_legacy_f32: ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @rcp_legacy_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @rcp_legacy_f32(ptr addrspace(1) %out, float %src) #1 { %rcp = call float @llvm.amdgcn.rcp.legacy(float %src) #0 - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; TODO: Really these should be constant folded ; GCN-LABEL: {{^}}rcp_legacy_f32_constant_4.0 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 4.0 -define amdgpu_kernel void @rcp_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_legacy_f32_constant_4.0(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.legacy(float 4.0) #0 - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}rcp_legacy_f32_constant_100.0 ; GCN: v_rcp_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000 -define amdgpu_kernel void @rcp_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_legacy_f32_constant_100.0(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.legacy(float 100.0) #0 - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}rcp_legacy_undef_f32: ; GCN-NOT: v_rcp_legacy_f32 -define amdgpu_kernel void @rcp_legacy_undef_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_legacy_undef_f32(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.legacy(float undef) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll index 929f935f691088..74d34044b7d952 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.ll @@ -10,27 +10,27 @@ declare float @llvm.sqrt.f32(float) #0 ; SI: v_mov_b32_e32 [[NAN:v[0-9]+]], 0x7fc00000 ; SI-NOT: [[NAN]] ; SI: buffer_store_dword [[NAN]] -define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_undef_f32(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.f32(float undef) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rcp_2_f32: ; SI-NOT: v_rcp_f32 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5 -define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_2_f32(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rcp_10_f32: ; SI-NOT: v_rcp_f32 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd -define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rcp_10_f32(ptr addrspace(1) %out) #1 { %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } @@ -38,9 +38,9 @@ define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 { ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} ; SI-NOT: [[RESULT]] ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(ptr addrspace(1) %out, float %src) #1 { %rcp = fdiv float 1.0, %src, !fpmath !0 - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } @@ -48,35 +48,35 @@ define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %o ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} ; SI-NOT: [[RESULT]] ; SI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 { +define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #4 { %rcp = fdiv float 1.0, %src, !fpmath !0 - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: ; SI: v_div_scale_f32 -define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 { +define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(ptr addrspace(1) %out, float %src) #3 { %rcp = fdiv float 1.0, %src - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: ; SI: v_rsq_f32_e32 -define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @safe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #1 { %sqrt = call float @llvm.sqrt.f32(float %src) %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: ; SI: v_rsq_f32_e32 -define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 { +define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(ptr addrspace(1) %out, float %src) #2 { %sqrt = call float @llvm.sqrt.f32(float %src) %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) - store float %rcp, float addrspace(1)* %out, align 4 + store float %rcp, ptr addrspace(1) %out, align 4 ret void } @@ -84,9 +84,9 @@ define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, floa ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} ; SI-NOT: [[RESULT]] ; SI: buffer_store_dwordx2 [[RESULT]] -define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @rcp_f64(ptr addrspace(1) %out, double %src) #1 { %rcp = call double @llvm.amdgcn.rcp.f64(double %src) - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } @@ -94,17 +94,17 @@ define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 { ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} ; SI-NOT: [[RESULT]] ; SI: buffer_store_dwordx2 [[RESULT]] -define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 { +define amdgpu_kernel void @unsafe_rcp_f64(ptr addrspace(1) %out, double %src) #2 { %rcp = call double @llvm.amdgcn.rcp.f64(double %src) - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}rcp_pat_f64: ; SI: v_div_scale_f64 -define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 { %rcp = fdiv double 1.0, %src - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } @@ -116,9 +116,9 @@ define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) # ; SI: v_fma_f64 ; SI: v_fma_f64 ; SI: v_fma_f64 -define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { +define amdgpu_kernel void @unsafe_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 { %rcp = fdiv double 1.0, %src - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } @@ -126,10 +126,10 @@ define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double ; SI-NOT: v_rsq_f64_e32 ; SI: v_sqrt_f64 ; SI: v_rcp_f64 -define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @safe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #1 { %sqrt = call double @llvm.sqrt.f64(double %src) %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } @@ -137,10 +137,10 @@ define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, doubl ; SI: v_sqrt_f64_e32 [[SQRT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SQRT]] ; SI: buffer_store_dwordx2 [[RESULT]] -define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { +define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(ptr addrspace(1) %out, double %src) #2 { %sqrt = call double @llvm.sqrt.f64(double %src) %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) - store double %rcp, double addrspace(1)* %out, align 8 + store double %rcp, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll index 991afdb6c016ee..e789db19a67380 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readfirstlane.ll @@ -4,9 +4,9 @@ declare i32 @llvm.amdgcn.readfirstlane(i32) #0 ; CHECK-LABEL: {{^}}test_readfirstlane: ; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v2 -define void @test_readfirstlane(i32 addrspace(1)* %out, i32 %src) #1 { +define void @test_readfirstlane(ptr addrspace(1) %out, i32 %src) #1 { %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src) - store i32 %readfirstlane, i32 addrspace(1)* %out, align 4 + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 ret void } @@ -14,7 +14,7 @@ define void @test_readfirstlane(i32 addrspace(1)* %out, i32 %src) #1 { ; CHECK: s_mov_b32 [[SGPR_VAL:s[0-9]]], 32 ; CHECK-NOT: [[SGPR_VAL]] ; CHECK: ; use [[SGPR_VAL]] -define amdgpu_kernel void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readfirstlane_imm(ptr addrspace(1) %out) #1 { %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32) call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) ret void @@ -24,9 +24,9 @@ define amdgpu_kernel void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 { ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32 ; CHECK-NOT: [[VVAL]] ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]] -define amdgpu_kernel void @test_readfirstlane_imm_fold(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readfirstlane_imm_fold(ptr addrspace(1) %out) #1 { %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32) - store i32 %readfirstlane, i32 addrspace(1)* %out, align 4 + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 ret void } @@ -34,10 +34,10 @@ define amdgpu_kernel void @test_readfirstlane_imm_fold(i32 addrspace(1)* %out) # ; CHECK: s_mov_b32 m0, -1 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]] -define amdgpu_kernel void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) #1 { %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0) - store i32 %readfirstlane, i32 addrspace(1)* %out, align 4 + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 ret void } @@ -49,19 +49,19 @@ define amdgpu_kernel void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 { ; CHECK-NOT: readfirstlane ; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]] ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]] -define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(ptr addrspace(1) %out) #1 { %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %sgpr) - store i32 %readfirstlane, i32 addrspace(1)* %out, align 4 + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 ret void } ; Make sure this doesn't crash. ; CHECK-LABEL: {{^}}test_readfirstlane_fi: ; CHECK: s_mov_b32 [[FIVAL:s[0-9]]], 4 -define amdgpu_kernel void @test_readfirstlane_fi(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) #1 { %alloca = alloca i32, addrspace(5) - %int = ptrtoint i32 addrspace(5)* %alloca to i32 + %int = ptrtoint ptr addrspace(5) %alloca to i32 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int) call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll index 230d7aff230a4d..51465f6bd10ce5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.readlane.ll @@ -21,23 +21,23 @@ define amdgpu_kernel void @test_readlane_vreg_sreg(i32 %src0, i32 %src1) #1 { ; CHECK-LABEL: {{^}}test_readlane_imm_sreg: ; CHECK-NOT: v_readlane_b32 -define amdgpu_kernel void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { +define amdgpu_kernel void @test_readlane_imm_sreg(ptr addrspace(1) %out, i32 %src1) #1 { %readlane = call i32 @llvm.amdgcn.readlane(i32 32, i32 %src1) - store i32 %readlane, i32 addrspace(1)* %out, align 4 + store i32 %readlane, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_readlane_vregs: ; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}} ; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, [[LANE]] -define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 { +define amdgpu_kernel void @test_readlane_vregs(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid - %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in + %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid + %args = load <2 x i32>, ptr addrspace(1) %gep.in %value = extractelement <2 x i32> %args, i32 0 %lane = extractelement <2 x i32> %args, i32 1 %readlane = call i32 @llvm.amdgcn.readlane(i32 %value, i32 %lane) - store i32 %readlane, i32 addrspace(1)* %out, align 4 + store i32 %readlane, ptr addrspace(1) %out, align 4 ret void } @@ -46,19 +46,19 @@ define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32> ; CHECK: s_mov_b32 m0, -1 ; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], m0 ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]] -define amdgpu_kernel void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { +define amdgpu_kernel void @test_readlane_m0_sreg(ptr addrspace(1) %out, i32 %src1) #1 { %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() %readlane = call i32 @llvm.amdgcn.readlane(i32 %m0, i32 %src1) - store i32 %readlane, i32 addrspace(1)* %out, align 4 + store i32 %readlane, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_readlane_vgpr_imm: ; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 32 -define amdgpu_kernel void @test_readlane_vgpr_imm(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readlane_vgpr_imm(ptr addrspace(1) %out) #1 { %vgpr = call i32 asm sideeffect "; def $0", "=v"() %readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 32) #0 - store i32 %readlane, i32 addrspace(1)* %out, align 4 + store i32 %readlane, ptr addrspace(1) %out, align 4 ret void } @@ -70,10 +70,10 @@ define amdgpu_kernel void @test_readlane_vgpr_imm(i32 addrspace(1)* %out) #1 { ; CHECK-NOT: readlane ; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]] ; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]] -define amdgpu_kernel void @test_readlane_copy_from_sgpr(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_readlane_copy_from_sgpr(ptr addrspace(1) %out) #1 { %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() %readfirstlane = call i32 @llvm.amdgcn.readlane(i32 %sgpr, i32 7) - store i32 %readfirstlane, i32 addrspace(1)* %out, align 4 + store i32 %readfirstlane, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll index 53c8a8c7a06000..9b62d6c2fdec9c 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.clamp.ll @@ -12,9 +12,9 @@ declare double @llvm.amdgcn.rsq.clamp.f64(double) #1 ; VI-DAG: v_min_f32_e32 [[MIN:v[0-9]+]], 0x7f7fffff, [[RSQ]] ; VI: v_max_f32_e32 [[RESULT:v[0-9]+]], 0xff7fffff, [[MIN]] ; VI: buffer_store_dword [[RESULT]] -define amdgpu_kernel void @rsq_clamp_f32(float addrspace(1)* %out, float %src) #0 { +define amdgpu_kernel void @rsq_clamp_f32(ptr addrspace(1) %out, float %src) #0 { %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src) - store float %rsq_clamp, float addrspace(1)* %out + store float %rsq_clamp, ptr addrspace(1) %out ret void } @@ -30,17 +30,17 @@ define amdgpu_kernel void @rsq_clamp_f32(float addrspace(1)* %out, float %src) # ; VI-DAG: v_rsq_f64_e32 [[RSQ:v\[[0-9]+:[0-9]+\]]], s[{{[0-9]+:[0-9]+}} ; VI-DAG: v_min_f64 v[0:1], [[RSQ]], s[[[LOW1]]:[[HIGH1]]] ; VI-DAG: v_max_f64 v[0:1], v[0:1], s[[[LOW1]]:[[HIGH2]]] -define amdgpu_kernel void @rsq_clamp_f64(double addrspace(1)* %out, double %src) #0 { +define amdgpu_kernel void @rsq_clamp_f64(ptr addrspace(1) %out, double %src) #0 { %rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src) - store double %rsq_clamp, double addrspace(1)* %out + store double %rsq_clamp, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}rsq_clamp_undef_f32: ; SI-NOT: v_rsq_clamp_f32 -define amdgpu_kernel void @rsq_clamp_undef_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @rsq_clamp_undef_f32(ptr addrspace(1) %out) #0 { %rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef) - store float %rsq_clamp, float addrspace(1)* %out + store float %rsq_clamp, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll index fd48021408100c..4c344469015d9f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.rsq.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @rsq_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.rsq.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll index 7f4c2cb19a3245..d987746a6a0de3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.legacy.ll @@ -4,34 +4,34 @@ declare float @llvm.amdgcn.rsq.legacy(float) #0 ; FUNC-LABEL: {{^}}rsq_legacy_f32: ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @rsq_legacy_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @rsq_legacy_f32(ptr addrspace(1) %out, float %src) #1 { %rsq = call float @llvm.amdgcn.rsq.legacy(float %src) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; TODO: Really these should be constant folded ; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_4.0 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 4.0 -define amdgpu_kernel void @rsq_legacy_f32_constant_4.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_legacy_f32_constant_4.0(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.legacy(float 4.0) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rsq_legacy_f32_constant_100.0 ; SI: v_rsq_legacy_f32_e32 {{v[0-9]+}}, 0x42c80000 -define amdgpu_kernel void @rsq_legacy_f32_constant_100.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_legacy_f32_constant_100.0(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.legacy(float 100.0) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rsq_legacy_undef_f32: ; SI-NOT: v_rsq_legacy_f32 -define amdgpu_kernel void @rsq_legacy_undef_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_legacy_undef_f32(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.legacy(float undef) - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll index 0ce26d0fe8762d..2be94f4dbc6501 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.ll @@ -6,43 +6,43 @@ declare double @llvm.amdgcn.rsq.f64(double) #0 ; FUNC-LABEL: {{^}}rsq_f32: ; SI: v_rsq_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @rsq_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @rsq_f32(ptr addrspace(1) %out, float %src) #1 { %rsq = call float @llvm.amdgcn.rsq.f32(float %src) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; TODO: Really these should be constant folded ; FUNC-LABEL: {{^}}rsq_f32_constant_4.0 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 4.0 -define amdgpu_kernel void @rsq_f32_constant_4.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_f32_constant_4.0(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.f32(float 4.0) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rsq_f32_constant_100.0 ; SI: v_rsq_f32_e32 {{v[0-9]+}}, 0x42c80000 -define amdgpu_kernel void @rsq_f32_constant_100.0(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_f32_constant_100.0(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.f32(float 100.0) #0 - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rsq_f64: ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @rsq_f64(double addrspace(1)* %out, double %src) #1 { +define amdgpu_kernel void @rsq_f64(ptr addrspace(1) %out, double %src) #1 { %rsq = call double @llvm.amdgcn.rsq.f64(double %src) #0 - store double %rsq, double addrspace(1)* %out, align 4 + store double %rsq, ptr addrspace(1) %out, align 4 ret void } ; TODO: Really these should be constant folded ; FUNC-LABEL: {{^}}rsq_f64_constant_4.0 ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, 4.0 -define amdgpu_kernel void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_f64_constant_4.0(ptr addrspace(1) %out) #1 { %rsq = call double @llvm.amdgcn.rsq.f64(double 4.0) #0 - store double %rsq, double addrspace(1)* %out, align 4 + store double %rsq, ptr addrspace(1) %out, align 4 ret void } @@ -50,17 +50,17 @@ define amdgpu_kernel void @rsq_f64_constant_4.0(double addrspace(1)* %out) #1 { ; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0x40590000 ; SI-DAG: s_mov_b32 s{{[0-9]+}}, 0{{$}} ; SI: v_rsq_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}} -define amdgpu_kernel void @rsq_f64_constant_100.0(double addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_f64_constant_100.0(ptr addrspace(1) %out) #1 { %rsq = call double @llvm.amdgcn.rsq.f64(double 100.0) #0 - store double %rsq, double addrspace(1)* %out, align 4 + store double %rsq, ptr addrspace(1) %out, align 4 ret void } ; FUNC-LABEL: {{^}}rsq_undef_f32: ; SI-NOT: v_rsq_f32 -define amdgpu_kernel void @rsq_undef_f32(float addrspace(1)* %out) #1 { +define amdgpu_kernel void @rsq_undef_f32(ptr addrspace(1) %out) #1 { %rsq = call float @llvm.amdgcn.rsq.f32(float undef) - store float %rsq, float addrspace(1)* %out, align 4 + store float %rsq, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll index 4a3c1a5a12b833..48c4e0276edda3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll @@ -4,7 +4,7 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT2 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck --check-prefix=VARIANT3 %s -define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { +define amdgpu_kernel void @test_barrier(ptr addrspace(1) %out, i32 %size) #0 { ; VARIANT0-LABEL: test_barrier: ; VARIANT0: ; %bb.0: ; %entry ; VARIANT0-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 @@ -87,14 +87,14 @@ define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 { ; VARIANT3-NEXT: s_endpgm entry: %tmp = call i32 @llvm.amdgcn.workitem.id.x() - %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp - store i32 %tmp, i32 addrspace(1)* %tmp1 + %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp + store i32 %tmp, ptr addrspace(1) %tmp1 call void @llvm.amdgcn.s.barrier() %tmp3 = sub i32 %size, 1 %tmp4 = sub i32 %tmp3, %tmp - %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4 - %tmp6 = load i32, i32 addrspace(1)* %tmp5 - store i32 %tmp6, i32 addrspace(1)* %tmp1 + %tmp5 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp4 + %tmp6 = load i32, ptr addrspace(1) %tmp5 + store i32 %tmp6, ptr addrspace(1) %tmp1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll index 49bb769ac6a061..1042290ce83625 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.buffer.load.ll @@ -214,7 +214,7 @@ main_body: define amdgpu_ps void @s_buffer_load_index_across_bb(<4 x i32> inreg %desc, i32 %index) { main_body: %tmp = shl i32 %index, 4 - store i32 %tmp, i32 addrspace(1)* @gv + store i32 %tmp, ptr addrspace(1) @gv br label %bb1 bb1: ; preds = %main_body diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll index b7fb96a2d1a598..d899aeb57065a1 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll @@ -24,7 +24,7 @@ define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 { br label %end end: - store volatile i32 3, i32 addrspace(1)* undef + store volatile i32 3, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll index 41f398f685f941..fc8d61f4b97467 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll @@ -24,7 +24,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 { br label %end end: - store volatile i32 3, i32 addrspace(1)* undef + store volatile i32 3, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll index 9026d4831ad794..43f2f61dfa0ca0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 { br label %end end: - store volatile i32 3, i32 addrspace(1)* undef + store volatile i32 3, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll index 7f6e115e0e6956..4ba28cf3fcd62f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 { br label %end end: - store volatile i32 3, i32 addrspace(1)* undef + store volatile i32 3, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll index 111fd35e1ce41c..46953d32dd4351 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.get.waveid.in.workgroup.ll @@ -8,11 +8,11 @@ declare i32 @llvm.amdgcn.s.get.waveid.in.workgroup() #0 ; GFX10: s_waitcnt lgkmcnt(0) ; GFX10: v_mov_b32_e32 [[VDEST:v[0-9]+]], [[DEST]] ; GFX10: global_store_dword v{{[0-9]+}}, [[VDEST]], s{{\[[0-9]+:[0-9]+\]$}} -define amdgpu_kernel void @test_s_get_waveid_in_workgroup(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_s_get_waveid_in_workgroup(ptr addrspace(1) %out) { ; Make sure %out is loaded and assiciated wait count already inserted - store i32 0, i32 addrspace(1)* %out + store i32 0, ptr addrspace(1) %out %v = call i32 @llvm.amdgcn.s.get.waveid.in.workgroup() - store i32 %v, i32 addrspace(1)* %out + store i32 %v, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll index 22e15e21680516..962b6c841f93ac 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getpc.ll @@ -6,9 +6,9 @@ declare i64 @llvm.amdgcn.s.getpc() #0 ; GCN: s_load_dwordx2 ; GCN-DAG: s_getpc_b64 s{{\[[0-9]+:[0-9]+\]}} ; GCN: buffer_store_dwordx2 -define amdgpu_kernel void @test_s_getpc(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_s_getpc(ptr addrspace(1) %out) #0 { %tmp = call i64 @llvm.amdgcn.s.getpc() #1 - store volatile i64 %tmp, i64 addrspace(1)* %out, align 8 + store volatile i64 %tmp, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll index 69cbf62c538d2e..def43948cbc900 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.getreg.ll @@ -9,20 +9,20 @@ ; GCN-LABEL: {{^}}s_getreg_test: ; GCN: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_LDS_ALLOC, 8, 23) -define amdgpu_kernel void @s_getreg_test(i32 addrspace(1)* %out) { ; simm16=45574 for lds size. +define amdgpu_kernel void @s_getreg_test(ptr addrspace(1) %out) { ; simm16=45574 for lds size. %lds_size_64dwords = call i32 @llvm.amdgcn.s.getreg(i32 45574) %lds_size_bytes = shl i32 %lds_size_64dwords, 8 - store i32 %lds_size_bytes, i32 addrspace(1)* %out + store i32 %lds_size_bytes, ptr addrspace(1) %out ret void } ; Call site has additional readnone knowledge. ; GCN-LABEL: {{^}}readnone_s_getreg_test: ; GCN: s_getreg_b32 s{{[0-9]+}}, hwreg(HW_REG_LDS_ALLOC, 8, 23) -define amdgpu_kernel void @readnone_s_getreg_test(i32 addrspace(1)* %out) { ; simm16=45574 for lds size. +define amdgpu_kernel void @readnone_s_getreg_test(ptr addrspace(1) %out) { ; simm16=45574 for lds size. %lds_size_64dwords = call i32 @llvm.amdgcn.s.getreg(i32 45574) #1 %lds_size_bytes = shl i32 %lds_size_64dwords, 8 - store i32 %lds_size_bytes, i32 addrspace(1)* %out + store i32 %lds_size_bytes, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll index 43d95dcbca0a72..e4bfb0821a5f73 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memrealtime.ll @@ -13,12 +13,12 @@ declare i64 @llvm.amdgcn.s.memrealtime() #0 ; GCN-NOT: lgkmcnt ; GCN: s_memrealtime s{{\[[0-9]+:[0-9]+\]}} ; GCN: _store_dwordx2 -define amdgpu_kernel void @test_s_memrealtime(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_s_memrealtime(ptr addrspace(1) %out) #0 { %cycle0 = call i64 @llvm.amdgcn.s.memrealtime() - store volatile i64 %cycle0, i64 addrspace(1)* %out + store volatile i64 %cycle0, ptr addrspace(1) %out %cycle1 = call i64 @llvm.amdgcn.s.memrealtime() - store volatile i64 %cycle1, i64 addrspace(1)* %out + store volatile i64 %cycle1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll index cd2eada5b82eb7..df8fb22cc8cf23 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.memtime.ll @@ -13,12 +13,12 @@ declare i64 @llvm.amdgcn.s.memtime() #0 ; SIVI-NOT: lgkmcnt ; GCN: s_memtime s{{\[[0-9]+:[0-9]+\]}} ; GCN: {{buffer|global}}_store_dwordx2 -define amdgpu_kernel void @test_s_memtime(i64 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_s_memtime(ptr addrspace(1) %out) #0 { %cycle0 = call i64 @llvm.amdgcn.s.memtime() - store volatile i64 %cycle0, i64 addrspace(1)* %out + store volatile i64 %cycle0, ptr addrspace(1) %out %cycle1 = call i64 @llvm.amdgcn.s.memtime() - store volatile i64 %cycle1, i64 addrspace(1)* %out + store volatile i64 %cycle1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll index 560d320dbc5606..8471626978ca4a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.hi.u8.ll @@ -5,17 +5,17 @@ declare i32 @llvm.amdgcn.sad.hi.u8(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_sad_hi_u8: ; GCN: v_sad_hi_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_hi_u8(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_sad_hi_u8(ptr addrspace(1) %out, i32 %src) { %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_sad_hi_u8_non_immediate: ; GCN: v_sad_hi_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_hi_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { +define amdgpu_kernel void @v_sad_hi_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) { %result= call i32 @llvm.amdgcn.sad.hi.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll index b159b84da9f134..b2876f67c7a6e4 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u16.ll @@ -5,17 +5,17 @@ declare i32 @llvm.amdgcn.sad.u16(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_sad_u16: ; GCN: v_sad_u16 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u16(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_sad_u16(ptr addrspace(1) %out, i32 %src) { %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_sad_u16_non_immediate: ; GCN: v_sad_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u16_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { +define amdgpu_kernel void @v_sad_u16_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) { %result= call i32 @llvm.amdgcn.sad.u16(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll index cde7e7ca44ae21..8c0ca22c03d8ef 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sad.u8.ll @@ -5,17 +5,17 @@ declare i32 @llvm.amdgcn.sad.u8(i32, i32, i32) #0 ; GCN-LABEL: {{^}}v_sad_u8: ; GCN: v_sad_u8 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @v_sad_u8(i32 addrspace(1)* %out, i32 %src) { +define amdgpu_kernel void @v_sad_u8(ptr addrspace(1) %out, i32 %src) { %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 100, i32 100) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_sad_u8_non_immediate: ; GCN: v_sad_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} -define amdgpu_kernel void @v_sad_u8_non_immediate(i32 addrspace(1)* %out, i32 %src, i32 %a, i32 %b) { +define amdgpu_kernel void @v_sad_u8_non_immediate(ptr addrspace(1) %out, i32 %src, i32 %a, i32 %b) { %result= call i32 @llvm.amdgcn.sad.u8(i32 %src, i32 %a, i32 %b) #0 - store i32 %result, i32 addrspace(1)* %out, align 4 + store i32 %result, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll index 8281a21462a4a3..d15a60e3ecb7f0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll @@ -3,60 +3,60 @@ ; GCN-LABEL: {{^}}bfe_i32_arg_arg_arg: ; GCN: v_bfe_i32 -define amdgpu_kernel void @bfe_i32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_i32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 %src1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_arg_arg_imm: ; GCN: v_bfe_i32 -define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 123) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_arg_imm_arg: ; GCN: v_bfe_i32 -define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { +define amdgpu_kernel void @bfe_i32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 123, i32 %src2) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_imm_arg_arg: ; GCN: v_bfe_i32 -define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_i32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 123, i32 %src1, i32 %src2) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}v_bfe_print_arg: ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 2, 8 -define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 { - %load = load i32, i32 addrspace(1)* %src0, align 4 +define amdgpu_kernel void @v_bfe_print_arg(ptr addrspace(1) %out, ptr addrspace(1) %src0) #0 { + %load = load i32, ptr addrspace(1) %src0, align 4 %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 2, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_reg_offset: ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 %src1, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_arg_0_width_imm_offset: ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { %bfe_u32 = call i32 @llvm.amdgcn.sbfe.i32(i32 %src0, i32 8, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } @@ -64,11 +64,11 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out ; GCN: v_lshlrev_b32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -78,11 +78,11 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace( ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 0, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -90,11 +90,11 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace( ; GCN: buffer_load_dword ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -103,10 +103,10 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace( ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, v{{[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -115,10 +115,10 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace( ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -127,10 +127,10 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 8, i32 24) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -139,10 +139,10 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace ; GCN: v_ashrrev_i32_e32 v{{[0-9]+}}, 24, v{{[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 24, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -150,22 +150,22 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace ; GCN: v_ashrrev_i32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = ashr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_test_14: ; GCN-NOT: lshr ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_i32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = lshr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_i32_constant_fold_test_0: @@ -173,9 +173,9 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_0(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 0) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -184,9 +184,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_1(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 12334, i32 0, i32 0) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -195,9 +195,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_2(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 0, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -206,9 +206,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_3(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 1, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -217,9 +217,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_4(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 0, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -228,9 +228,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_5(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 7, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -239,9 +239,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0xffffff80 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_6(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 128, i32 0, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -250,9 +250,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_7(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 0, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -261,9 +261,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_8(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 127, i32 6, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -272,9 +272,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_9(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65536, i32 16, i32 8) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -283,9 +283,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_10(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 65535, i32 16, i32 16) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -294,9 +294,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -6 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_11(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 4) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -305,9 +305,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_12(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 31, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -316,9 +316,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_13(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 131070, i32 16, i32 16) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -327,9 +327,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 40 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_14(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 2, i32 30) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -338,9 +338,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 10 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_15(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 160, i32 4, i32 28) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -349,9 +349,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], -1 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_16(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 4294967295, i32 1, i32 7) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -360,9 +360,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0x7f ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_17(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 1, i32 31) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -371,9 +371,9 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) ; GCN: v_mov_b32_e32 [[VREG:v[0-9]+]], 0 ; GCN: buffer_store_dword [[VREG]], ; GCN: s_endpgm -define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_i32_constant_fold_test_18(ptr addrspace(1) %out) #0 { %bfe_i32 = call i32 @llvm.amdgcn.sbfe.i32(i32 255, i32 31, i32 1) - store i32 %bfe_i32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_i32, ptr addrspace(1) %out, align 4 ret void } @@ -383,12 +383,12 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) ; GCN-NOT: v_ashr ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 0, 24 ; GCN: buffer_store_dword [[BFE]], -define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @bfe_sext_in_reg_i24(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %x, i32 0, i32 24) %shl = shl i32 %bfe, 8 %ashr = ashr i32 %shl, 8 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -399,21 +399,21 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; GCN: buffer_store_dword [[TMP2]] -define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %src = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @simplify_demanded_bfe_sdiv(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %src = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16) %div = sdiv i32 %bfe, 2 - store i32 %div, i32 addrspace(1)* %out, align 4 + store i32 %div, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_0_width: ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %load = load i32, i32 addrspace(1)* %ptr, align 4 +define amdgpu_kernel void @bfe_0_width(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 8, i32 0) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -421,22 +421,22 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* ; GCN: v_bfe_i32 ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %load = load i32, i32 addrspace(1)* %ptr, align 4 +define amdgpu_kernel void @bfe_8_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}bfe_8_bfe_16: ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 ; GCN: s_endpgm -define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %load = load i32, i32 addrspace(1)* %ptr, align 4 +define amdgpu_kernel void @bfe_8_bfe_16(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 8) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 16) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } @@ -445,11 +445,11 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1) ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 { - %load = load i32, i32 addrspace(1)* %ptr, align 4 +define amdgpu_kernel void @bfe_16_bfe_8(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i32, ptr addrspace(1) %ptr, align 4 %bfe0 = call i32 @llvm.amdgcn.sbfe.i32(i32 %load, i32 0, i32 16) %bfe1 = call i32 @llvm.amdgcn.sbfe.i32(i32 %bfe0, i32 0, i32 8) - store i32 %bfe1, i32 addrspace(1)* %out, align 4 + store i32 %bfe1, ptr addrspace(1) %out, align 4 ret void } @@ -458,22 +458,22 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1) ; GCN: s_sext_i32_i8 s{{[0-9]+}}, s{{[0-9]+}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %c = add i32 %a, %b ; add to prevent folding into extload %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 0, i32 8) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } ; GCN-LABEL: {{^}}sext_in_reg_i8_to_i32_bfe_wrong: -define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %c = add i32 %a, %b ; add to prevent folding into extload %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %c, i32 8, i32 0) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -481,13 +481,13 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou ; GCN: buffer_load_sbyte ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { - %load = load i8, i8 addrspace(1)* %ptr, align 1 +define amdgpu_kernel void @sextload_i8_to_i32_bfe(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i8, ptr addrspace(1) %ptr, align 1 %sext = sext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 0, i32 8) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -495,13 +495,13 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add ; GCN-LABEL: {{^}}sextload_i8_to_i32_bfe_0:{{.*$}} ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm -define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 { - %load = load i8, i8 addrspace(1)* %ptr, align 1 +define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(ptr addrspace(1) %out, ptr addrspace(1) %ptr) #0 { + %load = load i8, ptr addrspace(1) %ptr, align 1 %sext = sext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %sext, i32 8, i32 0) %shl = shl i32 %bfe, 24 %ashr = ashr i32 %shl, 24 - store i32 %ashr, i32 addrspace(1)* %out, align 4 + store i32 %ashr, ptr addrspace(1) %out, align 4 ret void } @@ -510,12 +510,12 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a ; GCN-NOT: shl ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 1 ; GCN: s_endpgm -define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = ashr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -525,12 +525,12 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i ; GCN-NOT: shr ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 1 ; GCN: s_endpgm -define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 30 %shr = ashr i32 %shl, 30 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } @@ -541,12 +541,12 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 2 ; GCN: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2 ; GCN: s_endpgm -define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %x = load i32, i32 addrspace(1)* %in, align 4 +define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 30 %shr = ashr i32 %shl, 30 %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %shr, i32 1, i32 2) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll index 00d1cbbd58c590..035903b9b068ef 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sched.group.barrier.ll @@ -26,7 +26,7 @@ entry: ret void } -define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(<32 x i32> addrspace(1)* noalias %in, <32 x i32> addrspace(1)* noalias %out) #0 { +define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 { ; GCN-LABEL: test_sched_group_barrier_pipeline_READ_VALU_WRITE: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -161,11 +161,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(<32 ; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000040) size(8) SyncID(0) ; EXACTCUTOFF-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #2 - %gep1 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %in, i32 %tid - %load = load <32 x i32>, <32 x i32> addrspace(1)* %gep1 + %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid + %load = load <32 x i32>, ptr addrspace(1) %gep1 %mul = mul <32 x i32> %load, %load - %gep2 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %out, i32 %tid - store <32 x i32> %mul, <32 x i32> addrspace(1)* %gep2 + %gep2 = getelementptr <32 x i32>, ptr addrspace(1) %out, i32 %tid + store <32 x i32> %mul, ptr addrspace(1) %gep2 ; 8 VMEM read call void @llvm.amdgcn.sched.group.barrier(i32 32, i32 8, i32 0) ; 30 VALU @@ -175,7 +175,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_READ_VALU_WRITE(<32 ret void } -define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU(<32 x i32> addrspace(1)* noalias %in, <32 x i32> addrspace(1)* noalias %out) #0 { +define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 { ; GCN-LABEL: test_sched_group_barrier_pipeline_alternating_READ_VALU: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -336,11 +336,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA ; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000040) size(8) SyncID(0) ; EXACTCUTOFF-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #2 - %gep1 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %in, i32 %tid - %load = load <32 x i32>, <32 x i32> addrspace(1)* %gep1 + %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid + %load = load <32 x i32>, ptr addrspace(1) %gep1 %mul = mul <32 x i32> %load, %load - %gep2 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %out, i32 %tid - store <32 x i32> %mul, <32 x i32> addrspace(1)* %gep2 + %gep2 = getelementptr <32 x i32>, ptr addrspace(1) %out, i32 %tid + store <32 x i32> %mul, ptr addrspace(1) %gep2 ; 1 VMEM read call void @llvm.amdgcn.sched.group.barrier(i32 32, i32 1, i32 0) ; 2 VALU @@ -378,7 +378,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA ret void } -define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE(<32 x i32> addrspace(1)* noalias %in, <32 x i32> addrspace(1)* noalias %out) #0 { +define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out) #0 { ; GCN-LABEL: test_sched_group_barrier_pipeline_alternating_READ_VALU_WRITE: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -555,11 +555,11 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA ; EXACTCUTOFF-NEXT: ; sched_group_barrier mask(0x00000040) size(1) SyncID(0) ; EXACTCUTOFF-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() #2 - %gep1 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %in, i32 %tid - %load = load <32 x i32>, <32 x i32> addrspace(1)* %gep1 + %gep1 = getelementptr <32 x i32>, ptr addrspace(1) %in, i32 %tid + %load = load <32 x i32>, ptr addrspace(1) %gep1 %mul = mul <32 x i32> %load, %load - %gep2 = getelementptr <32 x i32>, <32 x i32> addrspace(1)* %out, i32 %tid - store <32 x i32> %mul, <32 x i32> addrspace(1)* %gep2 + %gep2 = getelementptr <32 x i32>, ptr addrspace(1) %out, i32 %tid + store <32 x i32> %mul, ptr addrspace(1) %gep2 ; 1 VMEM read call void @llvm.amdgcn.sched.group.barrier(i32 32, i32 1, i32 0) ; 2 VALU @@ -611,7 +611,7 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_alternating_READ_VA ret void } -define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(<32 x float> addrspace(3)* noalias %in, <32 x float> addrspace(3)* noalias %out) #0 { +define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 { ; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_cluster: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -825,31 +825,31 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_cluster(<32 x ; EXACTCUTOFF-NEXT: s_endpgm entry: %idx = call i32 @llvm.amdgcn.workitem.id.x() - %load.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %in, i32 %idx - %load.0 = load <32 x float>, <32 x float> addrspace(3)* %load.0.addr - %load.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.0.addr, i32 64 - %load.1 = load <32 x float>, <32 x float> addrspace(3)* %load.1.addr - %load.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.1.addr, i32 128 - %load.2 = load <32 x float>, <32 x float> addrspace(3)* %load.2.addr - %load.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.2.addr, i32 192 - %load.3 = load <32 x float>, <32 x float> addrspace(3)* %load.3.addr - %load.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.3.addr, i32 256 - %load.4 = load <32 x float>, <32 x float> addrspace(3)* %load.4.addr + %load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx + %load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr + %load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64 + %load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr + %load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128 + %load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr + %load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192 + %load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr + %load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256 + %load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.0, i32 0, i32 0, i32 0) %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.1, i32 0, i32 0, i32 0) %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.2, i32 0, i32 0, i32 0) %mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.3, i32 0, i32 0, i32 0) %mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.4, i32 0, i32 0, i32 0) - %store.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 %idx - store <32 x float> %mai.0, <32 x float> addrspace(3)* %store.0.addr - %store.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 64 - store <32 x float> %mai.1, <32 x float> addrspace(3)* %store.1.addr - %store.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 128 - store <32 x float> %mai.2, <32 x float> addrspace(3)* %store.2.addr - %store.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 192 - store <32 x float> %mai.3, <32 x float> addrspace(3)* %store.3.addr - %store.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 256 - store <32 x float> %mai.4, <32 x float> addrspace(3)* %store.4.addr + %store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx + store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr + %store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64 + store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr + %store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128 + store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr + %store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192 + store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr + %store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256 + store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr ; 40 DS read call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 40, i32 0) ; 5 MFMA @@ -859,7 +859,7 @@ entry: ret void } -define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(<32 x float> addrspace(3)* noalias %in, <32 x float> addrspace(3)* noalias %out) #0 { +define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 { ; GCN-LABEL: test_sched_group_barrier_pipeline_MFMA_interleave: ; GCN: ; %bb.0: ; %entry ; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1127,31 +1127,31 @@ define amdgpu_kernel void @test_sched_group_barrier_pipeline_MFMA_interleave(<32 ; EXACTCUTOFF-NEXT: s_endpgm entry: %idx = call i32 @llvm.amdgcn.workitem.id.x() - %load.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %in, i32 %idx - %load.0 = load <32 x float>, <32 x float> addrspace(3)* %load.0.addr - %load.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.0.addr, i32 64 - %load.1 = load <32 x float>, <32 x float> addrspace(3)* %load.1.addr - %load.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.1.addr, i32 128 - %load.2 = load <32 x float>, <32 x float> addrspace(3)* %load.2.addr - %load.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.2.addr, i32 192 - %load.3 = load <32 x float>, <32 x float> addrspace(3)* %load.3.addr - %load.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.3.addr, i32 256 - %load.4 = load <32 x float>, <32 x float> addrspace(3)* %load.4.addr + %load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx + %load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr + %load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64 + %load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr + %load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128 + %load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr + %load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192 + %load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr + %load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256 + %load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr %mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.0, i32 0, i32 0, i32 0) %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.1, i32 0, i32 0, i32 0) %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.2, i32 0, i32 0, i32 0) %mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.3, i32 0, i32 0, i32 0) %mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.4, i32 0, i32 0, i32 0) - %store.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 %idx - store <32 x float> %mai.0, <32 x float> addrspace(3)* %store.0.addr - %store.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 64 - store <32 x float> %mai.1, <32 x float> addrspace(3)* %store.1.addr - %store.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 128 - store <32 x float> %mai.2, <32 x float> addrspace(3)* %store.2.addr - %store.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 192 - store <32 x float> %mai.3, <32 x float> addrspace(3)* %store.3.addr - %store.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 256 - store <32 x float> %mai.4, <32 x float> addrspace(3)* %store.4.addr + %store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx + store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr + %store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64 + store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr + %store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128 + store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr + %store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192 + store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr + %store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256 + store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr ; 8 DS read call void @llvm.amdgcn.sched.group.barrier(i32 256, i32 8, i32 0) ; 1 MFMA diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll index 241c0decc21944..2ec4fb7047487e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot2.ll @@ -10,16 +10,16 @@ declare i32 @llvm.amdgcn.sdot2(<2 x i16> %a, <2 x i16> %b, i32 %c, i1 %clamp) ; GFX908: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot2_clamp( - i32 addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load i32, i32 addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -28,15 +28,15 @@ entry: ; GFX908: v_dot2c_i32_i16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot2_i32_i16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot2_no_clamp( - i32 addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load i32, i32 addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll index 41661015f9033a..aa20f3546d6520 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot4.ll @@ -10,18 +10,18 @@ declare i32 @llvm.amdgcn.sdot4(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot4_clamp( - i32 addrspace(1)* %r, - <4 x i8> addrspace(1)* %a, - <4 x i8> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a - %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val = load <4 x i8>, ptr addrspace(1) %a + %b.val = load <4 x i8>, ptr addrspace(1) %b %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -29,17 +29,17 @@ entry: ; GFX906: v_dot4_i32_i8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot4c_i32_i8_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot4_no_clamp( - i32 addrspace(1)* %r, - <4 x i8> addrspace(1)* %a, - <4 x i8> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a - %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val = load <4 x i8>, ptr addrspace(1) %a + %b.val = load <4 x i8>, ptr addrspace(1) %b %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll index 948bc530c03988..a8cadaa8aaace0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sdot8.ll @@ -12,18 +12,18 @@ declare i32 @llvm.amdgcn.sdot8(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GFX908: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot8_clamp( - i32 addrspace(1)* %r, - <8 x i4> addrspace(1)* %a, - <8 x i4> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a - %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val = load <8 x i4>, ptr addrspace(1) %a + %b.val = load <8 x i4>, ptr addrspace(1) %b %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -32,17 +32,17 @@ entry: ; GFX908: v_dot8c_i32_i4_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot8_i32_i4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_sdot8_no_clamp( - i32 addrspace(1)* %r, - <8 x i4> addrspace(1)* %a, - <8 x i4> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a - %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val = load <8 x i4>, ptr addrspace(1) %a + %b.val = load <8 x i4>, ptr addrspace(1) %b %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.sdot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll index ae42d68e79077e..5cc5963ac64d81 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll @@ -2,7 +2,7 @@ ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s -define amdgpu_kernel void @test_get_doorbell(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_doorbell(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_get_doorbell: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -23,11 +23,11 @@ define amdgpu_kernel void @test_get_doorbell(i32 addrspace(1)* %out) { ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 128) - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_ddid(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_ddid(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_get_ddid: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -48,11 +48,11 @@ define amdgpu_kernel void @test_get_ddid(i32 addrspace(1)* %out) { ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 129) - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_tma(i64 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_tma(ptr addrspace(1) %out) { ; GFX11-LABEL: test_get_tma: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -64,11 +64,11 @@ define amdgpu_kernel void @test_get_tma(i64 addrspace(1)* %out) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 130) - store i64 %ret, i64 addrspace(1)* %out + store i64 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_realtime(i64 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_realtime(ptr addrspace(1) %out) { ; GFX11-LABEL: test_get_realtime: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -80,11 +80,11 @@ define amdgpu_kernel void @test_get_realtime(i64 addrspace(1)* %out) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 131) - store i64 %ret, i64 addrspace(1)* %out + store i64 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_savewave(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_savewave(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_savewave: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -105,11 +105,11 @@ define amdgpu_kernel void @test_savewave(i32 addrspace(1)* %out) { ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 132) - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_tba(i64 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_tba(ptr addrspace(1) %out) { ; GFX11-LABEL: test_get_tba: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -121,11 +121,11 @@ define amdgpu_kernel void @test_get_tba(i64 addrspace(1)* %out) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 133) - store i64 %ret, i64 addrspace(1)* %out + store i64 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_0_i32(i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_0_i32(ptr addrspace(1) %out) { ; GFX11-SDAG-LABEL: test_get_0_i32: ; GFX11-SDAG: ; %bb.0: ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -146,11 +146,11 @@ define amdgpu_kernel void @test_get_0_i32(i32 addrspace(1)* %out) { ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-GISEL-NEXT: s_endpgm %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0) - store i32 %ret, i32 addrspace(1)* %out + store i32 %ret, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_get_99999_i64(i64 addrspace(1)* %out) { +define amdgpu_kernel void @test_get_99999_i64(ptr addrspace(1) %out) { ; GFX11-LABEL: test_get_99999_i64: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24 @@ -162,7 +162,7 @@ define amdgpu_kernel void @test_get_99999_i64(i64 addrspace(1)* %out) { ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 99999) - store i64 %ret, i64 addrspace(1)* %out + store i64 %ret, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll index 21a36c5ba5742b..6d1ea93adb0df0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s -define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) { ; GCN-LABEL: set_inactive: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c @@ -17,11 +17,11 @@ define amdgpu_kernel void @set_inactive(i32 addrspace(1)* %out, i32 %in) { ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GCN-NEXT: s_endpgm %tmp = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0 - store i32 %tmp, i32 addrspace(1)* %out + store i32 %tmp, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) { +define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) { ; GCN-LABEL: set_inactive_64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 @@ -39,11 +39,11 @@ define amdgpu_kernel void @set_inactive_64(i64 addrspace(1)* %out, i64 %in) { ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 ; GCN-NEXT: s_endpgm %tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0 - store i64 %tmp, i64 addrspace(1)* %out + store i64 %tmp, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 x i32> inreg %desc) { +define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) { ; GCN-LABEL: set_inactive_scc: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 @@ -82,12 +82,12 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4 br i1 %cmp, label %.zero, label %.one .zero: - store i32 %tmp, i32 addrspace(1)* %out + store i32 %tmp, ptr addrspace(1) %out br label %.exit .one: %tmp.1 = add i32 %tmp, 1 - store i32 %tmp.1, i32 addrspace(1)* %out + store i32 %tmp.1, ptr addrspace(1) %out br label %.exit .exit: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll index d29ae5ba4374c3..1eb4675919a4f9 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll @@ -8,9 +8,9 @@ declare i32 @llvm.amdgcn.sffbh.i32(i32) #1 ; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] ; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] ; GCN: buffer_store_dword [[VRESULT]], -define amdgpu_kernel void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) #0 { +define amdgpu_kernel void @s_flbit(ptr addrspace(1) noalias %out, i32 %val) #0 { %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) - store i32 %r, i32 addrspace(1)* %out, align 4 + store i32 %r, ptr addrspace(1) %out, align 4 ret void } @@ -18,10 +18,10 @@ define amdgpu_kernel void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) #0 ; GCN: buffer_load_dword [[VAL:v[0-9]+]], ; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[RESULT]], -define amdgpu_kernel void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 { - %val = load i32, i32 addrspace(1)* %valptr, align 4 +define amdgpu_kernel void @v_flbit(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %valptr) #0 { + %val = load i32, ptr addrspace(1) %valptr, align 4 %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) - store i32 %r, i32 addrspace(1)* %out, align 4 + store i32 %r, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll index 4b930bfa210cd2..238604f7be6640 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.f16.ll @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.sin.f16(half %a) ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define amdgpu_kernel void @sin_f16( - half addrspace(1)* %r, - half addrspace(1)* %a) { + ptr addrspace(1) %r, + ptr addrspace(1) %a) { entry: - %a.val = load half, half addrspace(1)* %a + %a.val = load half, ptr addrspace(1) %a %r.val = call half @llvm.amdgcn.sin.f16(half %a.val) - store half %r.val, half addrspace(1)* %r + store half %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll index 06d24734ed622c..a605e31b60fdc5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sin.ll @@ -5,9 +5,9 @@ declare float @llvm.amdgcn.sin.f32(float) #0 ; GCN-LABEL: {{^}}v_sin_f32: ; GCN: v_sin_f32_e32 {{v[0-9]+}}, {{s[0-9]+}} -define amdgpu_kernel void @v_sin_f32(float addrspace(1)* %out, float %src) #1 { +define amdgpu_kernel void @v_sin_f32(ptr addrspace(1) %out, float %src) #1 { %sin = call float @llvm.amdgcn.sin.f32(float %src) #0 - store float %sin, float addrspace(1)* %out + store float %sin, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll index 4e89591d5f2e1a..3446dcd5f94546 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.ll @@ -122,10 +122,10 @@ main_body: ;CHECK-LABEL: {{^}}buffer_load_v4i32_tfe: ;CHECK: buffer_load_format_xyzw v[2:6], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, <4 x i32> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x i32>, i32 } %load, 0 - store <4 x i32> %data, <4 x i32> addrspace(1)* %out + store <4 x i32> %data, ptr addrspace(1) %out %status = extractvalue { <4 x i32>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -134,10 +134,10 @@ define amdgpu_cs float @buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, <4 x i32> a ;CHECK-LABEL: {{^}}buffer_load_v4f32_tfe: ;CHECK: buffer_load_format_xyzw v[2:6], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, <4 x float> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <4 x float>, i32 } %load, 0 - store <4 x float> %data, <4 x float> addrspace(1)* %out + store <4 x float> %data, ptr addrspace(1) %out %status = extractvalue { <4 x float>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -146,10 +146,10 @@ define amdgpu_cs float @buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, <4 x float> ;CHECK-LABEL: {{^}}buffer_load_v3i32_tfe: ;CHECK: buffer_load_format_xyz v[2:5], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, <3 x i32> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x i32>, i32 } %load, 0 - store <3 x i32> %data, <3 x i32> addrspace(1)* %out + store <3 x i32> %data, ptr addrspace(1) %out %status = extractvalue { <3 x i32>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -158,10 +158,10 @@ define amdgpu_cs float @buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, <3 x i32> a ;CHECK-LABEL: {{^}}buffer_load_v3f32_tfe: ;CHECK: buffer_load_format_xyz v[2:5], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, <3 x float> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <3 x float>, i32 } %load, 0 - store <3 x float> %data, <3 x float> addrspace(1)* %out + store <3 x float> %data, ptr addrspace(1) %out %status = extractvalue { <3 x float>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -171,10 +171,10 @@ define amdgpu_cs float @buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, <3 x float> ;GFX6: buffer_load_format_xyz v[2:5], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;GFX8PLUS: buffer_load_format_xy v[2:4], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, <2 x i32> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x i32>, i32 } %load, 0 - store <2 x i32> %data, <2 x i32> addrspace(1)* %out + store <2 x i32> %data, ptr addrspace(1) %out %status = extractvalue { <2 x i32>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -184,10 +184,10 @@ define amdgpu_cs float @buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, <2 x i32> a ;GFX6: buffer_load_format_xyz v[2:5], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;GFX8PLUS: buffer_load_format_xy v[2:4], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, <2 x float> addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { <2 x float>, i32 } %load, 0 - store <2 x float> %data, <2 x float> addrspace(1)* %out + store <2 x float> %data, ptr addrspace(1) %out %status = extractvalue { <2 x float>, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -196,10 +196,10 @@ define amdgpu_cs float @buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, <2 x float> ;CHECK-LABEL: {{^}}buffer_load_i32_tfe: ;CHECK: buffer_load_format_x v[2:3], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_i32_tfe(<4 x i32> inreg %rsrc, i32 addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { i32, i32 } %load, 0 - store i32 %data, i32 addrspace(1)* %out + store i32 %data, ptr addrspace(1) %out %status = extractvalue { i32, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus @@ -208,10 +208,10 @@ define amdgpu_cs float @buffer_load_i32_tfe(<4 x i32> inreg %rsrc, i32 addrspace ;CHECK-LABEL: {{^}}buffer_load_f32_tfe: ;CHECK: buffer_load_format_x v[2:3], {{v[0-9]+}}, s[0:3], 0 idxen tfe ;CHECK: s_waitcnt -define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, float addrspace(1)* %out) { +define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { %load = call { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) %data = extractvalue { float, i32 } %load, 0 - store float %data, float addrspace(1)* %out + store float %data, ptr addrspace(1) %out %status = extractvalue { float, i32 } %load, 1 %fstatus = bitcast i32 %status to float ret float %fstatus diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll index 3ddff95baf99e5..1a7fd5efb48703 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll @@ -94,11 +94,11 @@ bb: %i7 = zext i16 %i4 to i32 %i8 = zext i16 %i6 to i32 %i9 = add nuw nsw i32 0, 7 - %i10 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @esgs_ring, i32 0, i32 %i9 - store i32 %i7, i32 addrspace(3)* %i10, align 4 + %i10 = getelementptr [0 x i32], ptr addrspace(3) @esgs_ring, i32 0, i32 %i9 + store i32 %i7, ptr addrspace(3) %i10, align 4 %i11 = add nuw nsw i32 0, 8 - %i12 = getelementptr [0 x i32], [0 x i32] addrspace(3)* @esgs_ring, i32 0, i32 %i11 - store i32 %i8, i32 addrspace(3)* %i12, align 4 + %i12 = getelementptr [0 x i32], ptr addrspace(3) @esgs_ring, i32 0, i32 %i11 + store i32 %i8, ptr addrspace(3) %i12, align 4 unreachable } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll index b94ba8334b34d1..b0ece853db1b62 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.lds.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,SDAG ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=GCN,GISEL -declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) +declare void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) nocapture, i32 %size, i32 %vindex, i32 %voffset, i32 %soffset, i32 %offset, i32 %aux) -define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds) { +define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds) { ; SDAG-LABEL: buffer_load_lds_dword: ; SDAG: ; %bb.0: ; %main_body ; SDAG-NEXT: v_mov_b32_e32 v0, 8 @@ -32,15 +32,14 @@ define amdgpu_ps float @buffer_load_lds_dword(<4 x i32> inreg %rsrc, i8 addrspac ; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: ; return to shader part epilog main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) - %ptr = bitcast i8 addrspace(3)* %lds to float addrspace(3)* - %res = load float, float addrspace(3)* %ptr + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 4, i32 1) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 8, i32 0, i32 0, i32 8, i32 2) + %res = load float, ptr addrspace(3) %lds ret float %res } -define amdgpu_ps void @buffer_load_lds_dword_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { +define amdgpu_ps void @buffer_load_lds_dword_imm_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { ; GCN-LABEL: buffer_load_lds_dword_imm_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -48,11 +47,11 @@ define amdgpu_ps void @buffer_load_lds_dword_imm_offset(<4 x i32> inreg %rsrc, i ; GCN-NEXT: buffer_load_dword v0, s[0:3], 0 idxen offset:2048 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset) { +define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset) { ; GCN-LABEL: buffer_load_lds_dword_v_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -60,11 +59,11 @@ define amdgpu_ps void @buffer_load_lds_dword_v_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], 0 idxen offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 0, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_s_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -72,11 +71,11 @@ define amdgpu_ps void @buffer_load_lds_dword_s_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword v0, s[0:3], s5 idxen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 0, i32 %soffset, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_vs_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -84,11 +83,11 @@ define amdgpu_ps void @buffer_load_lds_dword_vs_offset(<4 x i32> inreg %rsrc, i8 ; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { +define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex, i32 %voffset, i32 inreg %soffset) { ; GCN-LABEL: buffer_load_lds_dword_vs_imm_offset: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -96,11 +95,11 @@ define amdgpu_ps void @buffer_load_lds_dword_vs_imm_offset(<4 x i32> inreg %rsrc ; GCN-NEXT: buffer_load_dword v[0:1], s[0:3], s5 idxen offen offset:2048 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 4, i32 %vindex, i32 %voffset, i32 %soffset, i32 2048, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { +define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { ; GCN-LABEL: buffer_load_lds_ushort: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: v_mov_b32_e32 v1, 0x800 @@ -109,11 +108,11 @@ define amdgpu_ps void @buffer_load_lds_ushort(<4 x i32> inreg %rsrc, i8 addrspac ; GCN-NEXT: buffer_load_ushort v[0:1], s[0:3], 0 idxen offen lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 2, i32 %vindex, i32 2048, i32 0, i32 0, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 2, i32 %vindex, i32 2048, i32 0, i32 0, i32 0) ret void } -define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace(3)* inreg %lds, i32 %vindex) { +define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, ptr addrspace(3) inreg %lds, i32 %vindex) { ; GCN-LABEL: buffer_load_lds_ubyte: ; GCN: ; %bb.0: ; %main_body ; GCN-NEXT: s_mov_b32 m0, s4 @@ -121,6 +120,6 @@ define amdgpu_ps void @buffer_load_lds_ubyte(<4 x i32> inreg %rsrc, i8 addrspace ; GCN-NEXT: buffer_load_ubyte v0, s[0:3], 0 idxen offset:2048 lds ; GCN-NEXT: s_endpgm main_body: - call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, i8 addrspace(3)* %lds, i32 1, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) + call void @llvm.amdgcn.struct.buffer.load.lds(<4 x i32> %rsrc, ptr addrspace(3) %lds, i32 1, i32 %vindex, i32 0, i32 0, i32 2048, i32 0) ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll index 84e0f28baec6a1..6fc304cc5ffc89 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll @@ -116,12 +116,12 @@ main_body: ; CHECK-LABEL: buffer_load_mmo: ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 -define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) { +define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, ptr addrspace(3) %lds) { entry: - store float 0.0, float addrspace(3)* %lds + store float 0.0, ptr addrspace(3) %lds %val = call float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) - %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 - store float 0.0, float addrspace(3)* %tmp2 + %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 + store float 0.0, ptr addrspace(3) %tmp2 ret float %val } @@ -205,10 +205,10 @@ main_body: ;CHECK-NEXT: buffer_load_ushort [[VAL:v[0-9]+]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b16 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_f16(<4 x i32> inreg %rsrc, half addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call half @llvm.amdgcn.struct.buffer.load.f16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store half %val, half addrspace(3)* %ptr + store half %val, ptr addrspace(3) %ptr ret void } @@ -217,10 +217,10 @@ main_body: ;CHECK-NEXT: buffer_load_dword [[VAL:v[0-9]+]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b32 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, <2 x half> addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_v2f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call <2 x half> @llvm.amdgcn.struct.buffer.load.v2f16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store <2 x half> %val, <2 x half> addrspace(3)* %ptr + store <2 x half> %val, ptr addrspace(3) %ptr ret void } @@ -229,10 +229,10 @@ main_body: ;CHECK-NEXT: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b64 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, <4 x half> addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_v4f16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call <4 x half> @llvm.amdgcn.struct.buffer.load.v4f16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store <4 x half> %val, <4 x half> addrspace(3)* %ptr + store <4 x half> %val, ptr addrspace(3) %ptr ret void } @@ -241,10 +241,10 @@ main_body: ;CHECK-NEXT: buffer_load_ushort [[VAL:v[0-9]+]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b16 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_i16(<4 x i32> inreg %rsrc, i16 addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_i16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call i16 @llvm.amdgcn.struct.buffer.load.i16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store i16 %val, i16 addrspace(3)* %ptr + store i16 %val, ptr addrspace(3) %ptr ret void } @@ -253,10 +253,10 @@ main_body: ;CHECK-NEXT: buffer_load_dword [[VAL:v[0-9]+]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b32 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_v2i16(<4 x i32> inreg %rsrc, <2 x i16> addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_v2i16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call <2 x i16> @llvm.amdgcn.struct.buffer.load.v2i16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store <2 x i16> %val, <2 x i16> addrspace(3)* %ptr + store <2 x i16> %val, ptr addrspace(3) %ptr ret void } @@ -265,10 +265,10 @@ main_body: ;CHECK-NEXT: buffer_load_dwordx2 [[VAL:v\[[0-9]+:[0-9]+\]]], v1, s[0:3], 0 idxen ;CHECK: s_waitcnt vmcnt(0) ;CHECK: ds_write_b64 v0, [[VAL]] -define amdgpu_ps void @struct_buffer_load_v4i16(<4 x i32> inreg %rsrc, <4 x i16> addrspace(3)* %ptr, i32 %idx) { +define amdgpu_ps void @struct_buffer_load_v4i16(<4 x i32> inreg %rsrc, ptr addrspace(3) %ptr, i32 %idx) { main_body: %val = call <4 x i16> @llvm.amdgcn.struct.buffer.load.v4i16(<4 x i32> %rsrc, i32 %idx, i32 0, i32 0, i32 0) - store <4 x i16> %val, <4 x i16> addrspace(3)* %ptr + store <4 x i16> %val, ptr addrspace(3) %ptr ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll index 8468aa3a7b3ef3..d0d5129f41dd91 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.trig.preop.ll @@ -9,11 +9,11 @@ declare double @llvm.amdgcn.trig.preop.f64(double, i32) nounwind readnone ; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], [[SEG]] ; SI: buffer_store_dwordx2 [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @test_trig_preop_f64(double addrspace(1)* %out, double addrspace(1)* %aptr, i32 addrspace(1)* %bptr) nounwind { - %a = load double, double addrspace(1)* %aptr, align 8 - %b = load i32, i32 addrspace(1)* %bptr, align 4 +define amdgpu_kernel void @test_trig_preop_f64(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind { + %a = load double, ptr addrspace(1) %aptr, align 8 + %b = load i32, ptr addrspace(1) %bptr, align 4 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 %b) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } @@ -22,9 +22,9 @@ define amdgpu_kernel void @test_trig_preop_f64(double addrspace(1)* %out, double ; SI: v_trig_preop_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[SRC]], 7 ; SI: buffer_store_dwordx2 [[RESULT]], ; SI: s_endpgm -define amdgpu_kernel void @test_trig_preop_f64_imm_segment(double addrspace(1)* %out, double addrspace(1)* %aptr) nounwind { - %a = load double, double addrspace(1)* %aptr, align 8 +define amdgpu_kernel void @test_trig_preop_f64_imm_segment(ptr addrspace(1) %out, ptr addrspace(1) %aptr) nounwind { + %a = load double, ptr addrspace(1) %aptr, align 8 %result = call double @llvm.amdgcn.trig.preop.f64(double %a, i32 7) nounwind readnone - store double %result, double addrspace(1)* %out, align 8 + store double %result, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll index e0dbfa9c587cda..f74446f72fa667 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -2,7 +2,7 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=SI %s ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=VI %s -define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_arg_arg_arg(ptr addrspace(1) %out, i32 %src0, i32 %src1, i32 %src2) #0 { ; SI-LABEL: bfe_u32_arg_arg_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -29,11 +29,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 %src1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_arg_imm(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; SI-LABEL: bfe_u32_arg_arg_imm: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -62,11 +62,11 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 123) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_arg_imm_arg(ptr addrspace(1) %out, i32 %src0, i32 %src2) #0 { ; SI-LABEL: bfe_u32_arg_imm_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -95,11 +95,11 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 123, i32 %src2) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 { +define amdgpu_kernel void @bfe_u32_imm_arg_arg(ptr addrspace(1) %out, i32 %src1, i32 %src2) #0 { ; SI-LABEL: bfe_u32_imm_arg_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -130,11 +130,11 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 123, i32 %src1, i32 %src2) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; SI-LABEL: bfe_u32_arg_0_width_reg_offset: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -155,11 +155,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 %src1, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 { +define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(ptr addrspace(1) %out, i32 %src0, i32 %src1) #0 { ; SI-LABEL: bfe_u32_arg_0_width_imm_offset: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -180,11 +180,11 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %src0, i32 8, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zextload_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zextload_i8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -218,15 +218,15 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp ; VI-NEXT: s_waitcnt vmcnt(0) ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i8, i8 addrspace(1)* %in + %load = load i8, ptr addrspace(1) %in %ext = zext i8 %load to i32 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; FIXME: Should be using s_add_i32 -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -264,15 +264,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad ; VI-NEXT: v_and_b32_e32 v0, 0xff, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -310,15 +310,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a ; VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 65535 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 0, i32 16) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -358,15 +358,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou ; VI-NEXT: v_bfe_u32 v0, v0, 1, 8 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 1, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_3: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -406,15 +406,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou ; VI-NEXT: v_bfe_u32 v0, v0, 3, 8 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 3, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i8_offset_7: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -454,15 +454,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou ; VI-NEXT: v_bfe_u32 v0, v0, 7, 8 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 255 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 7, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_zext_in_reg_i16_offset_8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -500,15 +500,15 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o ; VI-NEXT: v_bfe_u32 v0, v0, 8, 8 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %add = add i32 %load, 1 %ext = and i32 %add, 65535 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %ext, i32 8, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_1(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -544,13 +544,13 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -570,14 +570,14 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_3(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_3: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -597,14 +597,14 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_4: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -624,15 +624,15 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = lshr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_5(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_5: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -668,15 +668,15 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_bfe_i32 v0, v0, 0, 1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %shr = ashr i32 %shl, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shr, i32 0, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_6(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_6: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -714,14 +714,14 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_and_b32_e32 v0, 2.0, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_7(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_7: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -757,14 +757,14 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_lshlrev_b32_e32 v0, 31, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 0, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_8(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -800,14 +800,14 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_and_b32_e32 v0, 1, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = shl i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_9(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_9: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -843,13 +843,13 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace( ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_10(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_10: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -885,13 +885,13 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace ; VI-NEXT: v_lshrrev_b32_e32 v0, 1, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 1, i32 31) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_11(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_11: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -927,13 +927,13 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace ; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 8, i32 24) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_12(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_12: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -969,14 +969,14 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace ; VI-NEXT: v_lshrrev_b32_e32 v0, 24, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %x, i32 24, i32 8) - store i32 %bfe, i32 addrspace(1)* %out, align 4 + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; V_ASHRREV_U32_e32 {{v[0-9]+}}, 31, {{v[0-9]+}} -define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_13(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_13: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1012,13 +1012,13 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace ; VI-NEXT: v_lshrrev_b32_e32 v0, 31, v0 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = ashr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { ; SI-LABEL: bfe_u32_test_14: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1038,14 +1038,14 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace ; VI-NEXT: v_mov_b32_e32 v0, 0 ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm - %x = load i32, i32 addrspace(1)* %in, align 4 + %x = load i32, ptr addrspace(1) %in, align 4 %shl = lshr i32 %x, 31 %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %shl, i32 31, i32 1) - store i32 %bfe, i32 addrspace(1)* %out, align 4 ret void + store i32 %bfe, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_0: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1066,12 +1066,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_1: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1092,12 +1092,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 12334, i32 0, i32 0) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1118,12 +1118,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_3: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1144,12 +1144,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 1, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_4: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1170,12 +1170,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 0, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_5: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1196,12 +1196,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 7, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_6: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1222,12 +1222,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 128, i32 0, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_7: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1248,12 +1248,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 0, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_8: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1274,12 +1274,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 127, i32 6, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_9: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1300,12 +1300,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65536, i32 16, i32 8) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_10: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1326,12 +1326,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 65535, i32 16, i32 16) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_11: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1352,12 +1352,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 4) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_12: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1378,12 +1378,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 31, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_13: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1404,12 +1404,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 131070, i32 16, i32 16) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_14: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1430,12 +1430,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 2, i32 30) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_15: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1456,12 +1456,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 160, i32 4, i32 28) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_16: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1482,12 +1482,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 4294967295, i32 1, i32 7) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_17: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1508,12 +1508,12 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 1, i32 31) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } ; EG-NOT: BFE -define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 { ; SI-LABEL: bfe_u32_constant_fold_test_18: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 @@ -1534,7 +1534,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) ; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-NEXT: s_endpgm %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 255, i32 31, i32 1) - store i32 %bfe_u32, i32 addrspace(1)* %out, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out, align 4 ret void } @@ -1542,7 +1542,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) ; reduced to the bits demanded by the bfe. ; XXX: The operand to v_bfe_u32 could also just directly be the load register. -define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0, +define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(ptr addrspace(1) %out0, ; SI-LABEL: simplify_bfe_u32_multi_use_arg: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd @@ -1587,17 +1587,17 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out ; VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 ; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0 ; VI-NEXT: s_endpgm - i32 addrspace(1)* %out1, - i32 addrspace(1)* %in) #0 { - %src = load i32, i32 addrspace(1)* %in, align 4 + ptr addrspace(1) %out1, + ptr addrspace(1) %in) #0 { + %src = load i32, ptr addrspace(1) %in, align 4 %and = and i32 %src, 63 %bfe_u32 = call i32 @llvm.amdgcn.ubfe.i32(i32 %and, i32 2, i32 2) - store i32 %bfe_u32, i32 addrspace(1)* %out0, align 4 - store i32 %and, i32 addrspace(1)* %out1, align 4 + store i32 %bfe_u32, ptr addrspace(1) %out0, align 4 + store i32 %and, ptr addrspace(1) %out1, align 4 ret void } -define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @lshr_and(ptr addrspace(1) %out, i32 %a) #0 { ; SI-LABEL: lshr_and: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -1623,11 +1623,11 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 { ; VI-NEXT: s_endpgm %b = lshr i32 %a, 6 %c = and i32 %b, 7 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @v_lshr_and(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { ; SI-LABEL: v_lshr_and: ; SI: ; %bb.0: ; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 @@ -1657,11 +1657,11 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 ; VI-NEXT: s_endpgm %c = lshr i32 %a, %b %d = and i32 %c, 7 - store i32 %d, i32 addrspace(1)* %out, align 8 + store i32 %d, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @and_lshr(ptr addrspace(1) %out, i32 %a) #0 { ; SI-LABEL: and_lshr: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -1687,11 +1687,11 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; VI-NEXT: s_endpgm %b = and i32 %a, 448 %c = lshr i32 %b, 6 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @and_lshr2(ptr addrspace(1) %out, i32 %a) #0 { ; SI-LABEL: and_lshr2: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -1717,11 +1717,11 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 { ; VI-NEXT: s_endpgm %b = and i32 %a, 511 %c = lshr i32 %b, 6 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } -define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { +define amdgpu_kernel void @shl_lshr(ptr addrspace(1) %out, i32 %a) #0 { ; SI-LABEL: shl_lshr: ; SI: ; %bb.0: ; SI-NEXT: s_load_dword s2, s[0:1], 0xb @@ -1747,7 +1747,7 @@ define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 { ; VI-NEXT: s_endpgm %b = shl i32 %a, 9 %c = lshr i32 %b, 11 - store i32 %c, i32 addrspace(1)* %out, align 8 + store i32 %c, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll index f241a2378102a5..490ce706455cd6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot2.ll @@ -11,16 +11,16 @@ declare i32 @llvm.amdgcn.workitem.id.x() ; GFX9: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_clamp( - i32 addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load i32, i32 addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -28,16 +28,16 @@ entry: ; GFX9: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_no_clamp( - i32 addrspace(1)* %r, - <2 x i16> addrspace(1)* %a, - <2 x i16> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b - %c.val = load i32, i32 addrspace(1)* %c + %a.val = load <2 x i16>, ptr addrspace(1) %a + %b.val = load <2 x i16>, ptr addrspace(1) %b + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> %a.val, <2 x i16> %b.val, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -46,18 +46,18 @@ entry: ; GFX940: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}}{{$}} ; GFX10: v_dot2_u32_u16 v{{[0-9]+}}, 1, v{{[0-9]+}}, s{{[0-9]+}} op_sel:[0,1,0] op_sel_hi:[0,0,1]{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot2_op_sel( - i32 addrspace(1)* %r, - <2 x i16> addrspace(1)* %b, + ptr addrspace(1) %r, + ptr addrspace(1) %b, i32 %c) { entry: %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %b.gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i32 %id - %b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b.gep + %b.gep = getelementptr inbounds <2 x i16>, ptr addrspace(1) %b, i32 %id + %b.val = load <2 x i16>, ptr addrspace(1) %b.gep %b.elt0 = extractelement <2 x i16> %b.val, i32 0 %b.elt1 = extractelement <2 x i16> %b.val, i32 1 %b0 = insertelement <2 x i16> undef, i16 %b.elt1, i32 0 %b1 = insertelement <2 x i16> %b0, i16 %b.elt0, i32 1 %r.val = call i32 @llvm.amdgcn.udot2(<2 x i16> , <2 x i16> %b1, i32 %c, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll index a2fb4fdb88b5bc..7fc79a1e8f40d5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot4.ll @@ -10,18 +10,18 @@ declare i32 @llvm.amdgcn.udot4(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GFX9: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot4_clamp( - i32 addrspace(1)* %r, - <4 x i8> addrspace(1)* %a, - <4 x i8> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a - %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val = load <4 x i8>, ptr addrspace(1) %a + %b.val = load <4 x i8>, ptr addrspace(1) %b %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -29,17 +29,17 @@ entry: ; GFX9: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot4_u32_u8 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot4_no_clamp( - i32 addrspace(1)* %r, - <4 x i8> addrspace(1)* %a, - <4 x i8> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <4 x i8>, <4 x i8> addrspace(1)* %a - %b.val = load <4 x i8>, <4 x i8> addrspace(1)* %b + %a.val = load <4 x i8>, ptr addrspace(1) %a + %b.val = load <4 x i8>, ptr addrspace(1) %b %a.val.cast = bitcast <4 x i8> %a.val to i32 %b.val.cast = bitcast <4 x i8> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot4(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll index 8bc53c952b359b..4aed5b578f4cfc 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.udot8.ll @@ -10,18 +10,18 @@ declare i32 @llvm.amdgcn.udot8(i32 %a, i32 %b, i32 %c, i1 %clamp) ; GFX9: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} ; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot8_clamp( - i32 addrspace(1)* %r, - <8 x i4> addrspace(1)* %a, - <8 x i4> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a - %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val = load <8 x i4>, ptr addrspace(1) %a + %b.val = load <8 x i4>, ptr addrspace(1) %b %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 1) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } @@ -29,17 +29,17 @@ entry: ; GFX9: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}{{$}} ; GFX10: v_dot8_u32_u4 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}} define amdgpu_kernel void @test_llvm_amdgcn_udot8_no_clamp( - i32 addrspace(1)* %r, - <8 x i4> addrspace(1)* %a, - <8 x i4> addrspace(1)* %b, - i32 addrspace(1)* %c) { + ptr addrspace(1) %r, + ptr addrspace(1) %a, + ptr addrspace(1) %b, + ptr addrspace(1) %c) { entry: - %a.val = load <8 x i4>, <8 x i4> addrspace(1)* %a - %b.val = load <8 x i4>, <8 x i4> addrspace(1)* %b + %a.val = load <8 x i4>, ptr addrspace(1) %a + %b.val = load <8 x i4>, ptr addrspace(1) %b %a.val.cast = bitcast <8 x i4> %a.val to i32 %b.val.cast = bitcast <8 x i4> %b.val to i32 - %c.val = load i32, i32 addrspace(1)* %c + %c.val = load i32, ptr addrspace(1) %c %r.val = call i32 @llvm.amdgcn.udot8(i32 %a.val.cast, i32 %b.val.cast, i32 %c.val, i1 0) - store i32 %r.val, i32 addrspace(1)* %r + store i32 %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll index 581b73d105c3c4..8472271a89e892 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.update.dpp.ll @@ -10,9 +10,9 @@ ; GFX8-OPT: s_mov ; GFX8-NOOPT: s_nop 1 ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} -define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { +define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) { %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) #0 - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } @@ -23,9 +23,9 @@ define amdgpu_kernel void @dpp_test(i32 addrspace(1)* %out, i32 %in1, i32 %in2) ; GFX8-OPT: s_mov ; GFX8-NOOPT: s_nop 1 ; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1{{$}} -define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { +define amdgpu_kernel void @dpp_test_bc(ptr addrspace(1) %out, i32 %in1, i32 %in2) { %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 1) #0 - store i32 %tmp0, i32 addrspace(1)* %out + store i32 %tmp0, ptr addrspace(1) %out ret void } @@ -38,20 +38,20 @@ define amdgpu_kernel void @dpp_test_bc(i32 addrspace(1)* %out, i32 %in1, i32 %in ; GFX8: s_nop 1 ; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf @0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4 -define weak_odr amdgpu_kernel void @dpp_test1(i32* %arg) local_unnamed_addr { +define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr { bb: %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() %tmp1 = zext i32 %tmp to i64 - %tmp2 = getelementptr inbounds [448 x i32], [448 x i32] addrspace(3)* @0, i32 0, i32 %tmp - %tmp3 = load i32, i32 addrspace(3)* %tmp2, align 4 + %tmp2 = getelementptr inbounds [448 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp + %tmp3 = load i32, ptr addrspace(3) %tmp2, align 4 fence syncscope("workgroup-one-as") release tail call void @llvm.amdgcn.s.barrier() fence syncscope("workgroup-one-as") acquire %tmp4 = add nsw i32 %tmp3, %tmp3 %tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false) %tmp6 = add nsw i32 %tmp5, %tmp4 - %tmp7 = getelementptr inbounds i32, i32* %arg, i64 %tmp1 - store i32 %tmp6, i32* %tmp7, align 4 + %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 + store i32 %tmp6, ptr %tmp7, align 4 ret void } @@ -59,12 +59,12 @@ bb: ; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] ; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} ; GCN-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} -define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i64 %in2) { +define amdgpu_kernel void @update_dpp64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 0) #0 - store i64 %tmp0, i64 addrspace(1)* %gep + store i64 %tmp0, ptr addrspace(1) %gep ret void } @@ -79,12 +79,12 @@ define amdgpu_kernel void @update_dpp64_test(i64 addrspace(1)* %arg, i64 %in1, i ; GFX8-OPT-DAG,GFX10-DAG,GFX11-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} -define amdgpu_kernel void @update_dpp64_imm_old_test(i64 addrspace(1)* %arg, i64 %in2) { +define amdgpu_kernel void @update_dpp64_imm_old_test(ptr addrspace(1) %arg, i64 %in2) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr inbounds i64, i64 addrspace(1)* %arg, i32 %id - %load = load i64, i64 addrspace(1)* %gep + %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id + %load = load i64, ptr addrspace(1) %gep %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 123451234512345, i64 %load, i32 1, i32 1, i32 1, i1 0) #0 - store i64 %tmp0, i64 addrspace(1)* %gep + store i64 %tmp0, ptr addrspace(1) %gep ret void } @@ -97,9 +97,9 @@ define amdgpu_kernel void @update_dpp64_imm_old_test(i64 addrspace(1)* %arg, i64 ; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[OLD_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} ; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} -define amdgpu_kernel void @update_dpp64_imm_src_test(i64 addrspace(1)* %out, i64 %in1) { +define amdgpu_kernel void @update_dpp64_imm_src_test(ptr addrspace(1) %out, i64 %in1) { %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 123451234512345, i32 1, i32 1, i32 1, i1 0) #0 - store i64 %tmp0, i64 addrspace(1)* %out + store i64 %tmp0, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll index 224a7bb35c275f..1347ef28a710d5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wavefrontsize.ll @@ -22,16 +22,16 @@ ; W64: v_mov_b32_e32 [[V:v[0-9]+]], 64 ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT-W32: store i32 32, i32 addrspace(1)* %arg, align 4 -; OPT-W64: store i32 64, i32 addrspace(1)* %arg, align 4 +; OPT-W32: store i32 32, ptr addrspace(1) %arg, align 4 +; OPT-W64: store i32 64, ptr addrspace(1) %arg, align 4 ; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() -; OPT-WXX: store i32 %tmp, i32 addrspace(1)* %arg, align 4 +; OPT-WXX: store i32 %tmp, ptr addrspace(1) %arg, align 4 ; OPT-NEXT: ret void -define amdgpu_kernel void @fold_wavefrontsize(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @fold_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 - store i32 %tmp, i32 addrspace(1)* %arg, align 4 + store i32 %tmp, ptr addrspace(1) %arg, align 4 ret void } @@ -43,20 +43,20 @@ bb: ; GCN-NOT: cndmask ; GCN: store_{{dword|b32}} v{{.+}}, [[V]] -; OPT-W32: store i32 1, i32 addrspace(1)* %arg, align 4 -; OPT-W64: store i32 2, i32 addrspace(1)* %arg, align 4 +; OPT-W32: store i32 1, ptr addrspace(1) %arg, align 4 +; OPT-W64: store i32 2, ptr addrspace(1) %arg, align 4 ; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() ; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 ; OPT-WXX: %tmp2 = select i1 %tmp1, i32 2, i32 1 -; OPT-WXX: store i32 %tmp2, i32 addrspace(1)* %arg +; OPT-WXX: store i32 %tmp2, ptr addrspace(1) %arg ; OPT-NEXT: ret void -define amdgpu_kernel void @fold_and_optimize_wavefrontsize(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @fold_and_optimize_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 %tmp1 = icmp ugt i32 %tmp, 32 %tmp2 = select i1 %tmp1, i32 2, i32 1 - store i32 %tmp2, i32 addrspace(1)* %arg + store i32 %tmp2, ptr addrspace(1) %arg ret void } @@ -67,17 +67,17 @@ bb: ; OPT-WXX: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() ; OPT-WXX: %tmp1 = icmp ugt i32 %tmp, 32 ; OPT-WXX: bb3: -; OPT-W64: store i32 1, i32 addrspace(1)* %arg, align 4 +; OPT-W64: store i32 1, ptr addrspace(1) %arg, align 4 ; OPT-NEXT: ret void -define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(i32 addrspace(1)* nocapture %arg) { +define amdgpu_kernel void @fold_and_optimize_if_wavefrontsize(ptr addrspace(1) nocapture %arg) { bb: %tmp = tail call i32 @llvm.amdgcn.wavefrontsize() #0 %tmp1 = icmp ugt i32 %tmp, 32 br i1 %tmp1, label %bb2, label %bb3 bb2: ; preds = %bb - store i32 1, i32 addrspace(1)* %arg, align 4 + store i32 1, ptr addrspace(1) %arg, align 4 br label %bb3 bb3: ; preds = %bb2, %bb diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll index c63a634c16d0cf..e6af6e854c452d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_32.ll @@ -10,7 +10,7 @@ declare <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im ; @llvm.amdgcn.wmma.f32.16x16x16.f16 -define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <8 x float> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f32_16x16x16_f16: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f32_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -21,13 +21,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B ; W32-NEXT: s_endpgm bb: %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x float> %C) - store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.f32.16x16x16.bf16 -define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, <8 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f32_16x16x16_bf16: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f32_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -38,13 +38,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, ; W32-NEXT: s_endpgm bb: %res = call <8 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x float> %C) - store <8 x float> %res, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.f16.16x16x16.f16 -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f16_16x16x16_f16_lo: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -55,11 +55,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> ; W32-NEXT: s_endpgm bb: %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 0) - store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32 + store <16 x half> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, <16 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <16 x half> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_f16_16x16x16_f16_hi: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_f16_16x16x16_f16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1] @@ -70,13 +70,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> ; W32-NEXT: s_endpgm bb: %res = call <16 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <16 x half> %C, i1 1) - store <16 x half> %res, <16 x half> addrspace(1)* %out, align 32 + store <16 x half> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_lo: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] @@ -87,11 +87,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> ; W32-NEXT: s_endpgm bb: %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 0) - store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32 + store <16 x i16> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, <16 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_bf16_16x16x16_bf16_hi: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:23], v[0:7], v[8:15], v[16:23] op_sel:[0,0,1] @@ -102,13 +102,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> ; W32-NEXT: s_endpgm bb: %res = call <16 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <16 x i16> %C, i1 1) - store <16 x i16> %res, <16 x i16> addrspace(1)* %out, align 32 + store <16 x i16> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] @@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] @@ -134,11 +134,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] @@ -149,11 +149,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] @@ -164,11 +164,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] clamp @@ -179,11 +179,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[0,1,0] clamp @@ -194,11 +194,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,0,0] clamp @@ -209,11 +209,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu8 v[8:15], v[0:3], v[4:7], v[8:15] neg_lo:[1,1,0] clamp @@ -224,13 +224,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu4 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] @@ -241,11 +241,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] @@ -256,11 +256,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] @@ -271,11 +271,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] @@ -286,12 +286,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 0) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] clamp @@ -302,11 +302,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[0,1,0] clamp @@ -317,11 +317,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,0,0] clamp @@ -332,11 +332,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32 ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, <8 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <8 x i32> %C, ptr addrspace(1) %out) { ; W32-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp: ; W32: ; %bb.0: ; %bb ; W32-NEXT: v_wmma_i32_16x16x16_iu4 v[4:11], v[0:1], v[2:3], v[4:11] neg_lo:[1,1,0] clamp @@ -347,7 +347,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> ; W32-NEXT: s_endpgm bb: %res = call <8 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <8 x i32> %C, i1 1) - store <8 x i32> %res, <8 x i32> addrspace(1)* %out, align 32 + store <8 x i32> %res, ptr addrspace(1) %out, align 32 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll index 6db02059d44fb8..dd6b1a143630a6 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.wmma_64.ll @@ -10,7 +10,7 @@ declare <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 immarg, <2 x i32>, i1 im ; @llvm.amdgcn.wmma.f32.16x16x16.f16 -define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B, <4 x float> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f32_16x16x16_f16: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f32_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -19,13 +19,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_f16(<16 x half> %A, <16 x half> %B ; W64-NEXT: s_endpgm bb: %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.f16(<16 x half> %A, <16 x half> %B, <4 x float> %C) - store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.f32.16x16x16.bf16 -define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, <4 x float> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f32_16x16x16_bf16: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f32_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -34,13 +34,13 @@ define amdgpu_ps void @test_wmma_f32_16x16x16_bf16(<16 x i16> %A, <16 x i16> %B, ; W64-NEXT: s_endpgm bb: %res = call <4 x float> @llvm.amdgcn.wmma.f32.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <4 x float> %C) - store <4 x float> %res, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.f16.16x16x16.f16 -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f16_16x16x16_f16_lo: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -49,11 +49,11 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_lo(<16 x half> %A, <16 x half> ; W64-NEXT: s_endpgm bb: %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 0) - store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16 + store <8 x half> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, <8 x half> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> %B, <8 x half> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_f16_16x16x16_f16_hi: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_f16_16x16x16_f16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1] @@ -62,13 +62,13 @@ define amdgpu_ps void @test_wmma_f16_16x16x16_f16_hi(<16 x half> %A, <16 x half> ; W64-NEXT: s_endpgm bb: %res = call <8 x half> @llvm.amdgcn.wmma.f16.16x16x16.f16(<16 x half> %A, <16 x half> %B, <8 x half> %C, i1 1) - store <8 x half> %res, <8 x half> addrspace(1)* %out, align 16 + store <8 x half> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.bf16.16x16x16.bf16 -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_lo: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] @@ -77,11 +77,11 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_lo(<16 x i16> %A, <16 x i16> ; W64-NEXT: s_endpgm bb: %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 0) - store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16 + store <8 x i16> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, <8 x i16> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_bf16_16x16x16_bf16_hi: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_bf16_16x16x16_bf16 v[16:19], v[0:7], v[8:15], v[16:19] op_sel:[0,0,1] @@ -90,13 +90,13 @@ define amdgpu_ps void @test_wmma_bf16_16x16x16_bf16_hi(<16 x i16> %A, <16 x i16> ; W64-NEXT: s_endpgm bb: %res = call <8 x i16> @llvm.amdgcn.wmma.bf16.16x16x16.bf16(<16 x i16> %A, <16 x i16> %B, <8 x i16> %C, i1 1) - store <8 x i16> %res, <8 x i16> addrspace(1)* %out, align 16 + store <8 x i16> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu8 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] @@ -105,12 +105,12 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned(<4 x i32> %A ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] @@ -119,11 +119,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed(<4 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] @@ -132,11 +132,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned(<4 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] @@ -145,11 +145,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed(<4 x i32> %A, <4 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] clamp @@ -158,11 +158,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_unsigned_clamp(<4 x i ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[0,1,0] clamp @@ -171,11 +171,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_unsigned_signed_clamp(<4 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 0, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,0,0] clamp @@ -184,11 +184,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_unsigned_clamp(<4 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 0, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui8_signed_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu8 v[8:11], v[0:3], v[4:7], v[8:11] neg_lo:[1,1,0] clamp @@ -197,13 +197,13 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui8_signed_signed_clamp(<4 x i32> ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu8(i1 1, <4 x i32> %A, i1 1, <4 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } ; @llvm.amdgcn.wmma.i32.16x16x16.iu4 -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] @@ -212,11 +212,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned(<2 x i32> %A ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] @@ -225,11 +225,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed(<2 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] @@ -238,11 +238,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned(<2 x i32> %A, ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] @@ -251,11 +251,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed(<2 x i32> %A, <2 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 0) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] clamp @@ -264,11 +264,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_unsigned_clamp(<2 x i ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[0,1,0] clamp @@ -277,11 +277,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_unsigned_signed_clamp(<2 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 0, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,0,0] clamp @@ -290,11 +290,11 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_unsigned_clamp(<2 x i32 ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 0, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } -define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, <4 x i32> addrspace(1)* %out) { +define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> %A, <2 x i32> %B, <4 x i32> %C, ptr addrspace(1) %out) { ; W64-LABEL: test_wmma_i32_16x16x16_ui4_signed_signed_clamp: ; W64: ; %bb.0: ; %bb ; W64-NEXT: v_wmma_i32_16x16x16_iu4 v[4:7], v[0:1], v[2:3], v[4:7] neg_lo:[1,1,0] clamp @@ -303,7 +303,7 @@ define amdgpu_ps void @test_wmma_i32_16x16x16_ui4_signed_signed_clamp(<2 x i32> ; W64-NEXT: s_endpgm bb: %res = call <4 x i32> @llvm.amdgcn.wmma.i32.16x16x16.iu4(i1 1, <2 x i32> %A, i1 1, <2 x i32> %B, <4 x i32> %C, i1 1) - store <4 x i32> %res, <4 x i32> addrspace(1)* %out, align 16 + store <4 x i32> %res, ptr addrspace(1) %out, align 16 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll index 6d7ce5589f96f1..3eec083cd73074 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workgroup.id.ll @@ -34,9 +34,9 @@ declare i32 @llvm.amdgcn.workgroup.id.z() #0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_x(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.x() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -61,9 +61,9 @@ define amdgpu_kernel void @test_workgroup_id_x(i32 addrspace(1)* %out) #1 { ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_y(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.y() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -96,9 +96,9 @@ define amdgpu_kernel void @test_workgroup_id_y(i32 addrspace(1)* %out) #1 { ; ALL: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 ; ALL: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 ; ALL: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 -define amdgpu_kernel void @test_workgroup_id_z(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workgroup_id_z(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workgroup.id.z() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll index f41a184f3179c5..0d6c5cba6fed51 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll @@ -22,9 +22,9 @@ declare i32 @llvm.amdgcn.workitem.id.z() #0 ; ALL: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}v0 ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 0 -define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_x(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -40,9 +40,9 @@ define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 { ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 1 -define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_y(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.y() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -58,9 +58,9 @@ define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 { ; PACKED-TID: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 ; PACKED-TID: {{buffer|flat|global}}_store_{{dword|b32}} {{.*}}[[ID]] ; PACKED-TID: .amdhsa_system_vgpr_workitem_id 2 -define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { +define amdgpu_kernel void @test_workitem_id_z(ptr addrspace(1) %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.z() - store i32 %id, i32 addrspace(1)* %out + store i32 %id, ptr addrspace(1) %out ret void } @@ -76,13 +76,13 @@ define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 { ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work_group_size !0 { +define amdgpu_kernel void @test_reqd_workgroup_size_x_only(ptr %out) !reqd_work_group_size !0 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } @@ -98,13 +98,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_x_only(i32* %out) !reqd_work ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] ; ALL: flat_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, [[ZERO]] -define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work_group_size !1 { +define amdgpu_kernel void @test_reqd_workgroup_size_y_only(ptr %out) !reqd_work_group_size !1 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } @@ -119,13 +119,13 @@ define amdgpu_kernel void @test_reqd_workgroup_size_y_only(i32* %out) !reqd_work ; PACKED: v_bfe_u32 [[MASKED:v[0-9]+]], v0, 10, 20 ; PACKED: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[MASKED]] -define amdgpu_kernel void @test_reqd_workgroup_size_z_only(i32* %out) !reqd_work_group_size !2 { +define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_group_size !2 { %id.x = call i32 @llvm.amdgcn.workitem.id.x() %id.y = call i32 @llvm.amdgcn.workitem.id.y() %id.z = call i32 @llvm.amdgcn.workitem.id.z() - store volatile i32 %id.x, i32* %out - store volatile i32 %id.y, i32* %out - store volatile i32 %id.z, i32* %out + store volatile i32 %id.x, ptr %out + store volatile i32 %id.y, ptr %out + store volatile i32 %id.z, ptr %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll index bf75801a21d04c..37951669dbe755 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.writelane.ll @@ -8,33 +8,33 @@ declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0 ; CHECK-LABEL: {{^}}test_writelane_sreg: ; CIGFX9: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, m0 ; GFX10: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_writelane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { - %oldval = load i32, i32 addrspace(1)* %out +define amdgpu_kernel void @test_writelane_sreg(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { + %oldval = load i32, ptr addrspace(1) %out %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_writelane_imm_sreg: ; CHECK: v_writelane_b32 v{{[0-9]+}}, 32, s{{[0-9]+}} -define amdgpu_kernel void @test_writelane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { - %oldval = load i32, i32 addrspace(1)* %out +define amdgpu_kernel void @test_writelane_imm_sreg(ptr addrspace(1) %out, i32 %src1) #1 { + %oldval = load i32, ptr addrspace(1) %out %writelane = call i32 @llvm.amdgcn.writelane(i32 32, i32 %src1, i32 %oldval) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_writelane_vreg_lane: ; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}} ; CHECK: v_writelane_b32 v{{[0-9]+}}, 12, [[LANE]] -define amdgpu_kernel void @test_writelane_vreg_lane(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 { +define amdgpu_kernel void @test_writelane_vreg_lane(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid - %args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in - %oldval = load i32, i32 addrspace(1)* %out + %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid + %args = load <2 x i32>, ptr addrspace(1) %gep.in + %oldval = load i32, ptr addrspace(1) %out %lane = extractelement <2 x i32> %args, i32 1 %writelane = call i32 @llvm.amdgcn.writelane(i32 12, i32 %lane, i32 %oldval) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } @@ -43,20 +43,20 @@ define amdgpu_kernel void @test_writelane_vreg_lane(i32 addrspace(1)* %out, <2 x ; CIGFX9: s_mov_b32 [[COPY_M0:s[0-9]+]], m0 ; CIGFX9: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], m0 ; GFX10: v_writelane_b32 v{{[0-9]+}}, m0, s{{[0-9]+}} -define amdgpu_kernel void @test_writelane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 { - %oldval = load i32, i32 addrspace(1)* %out +define amdgpu_kernel void @test_writelane_m0_sreg(ptr addrspace(1) %out, i32 %src1) #1 { + %oldval = load i32, ptr addrspace(1) %out %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() %writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %src1, i32 %oldval) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } ; CHECK-LABEL: {{^}}test_writelane_imm: ; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 32 -define amdgpu_kernel void @test_writelane_imm(i32 addrspace(1)* %out, i32 %src0) #1 { - %oldval = load i32, i32 addrspace(1)* %out +define amdgpu_kernel void @test_writelane_imm(ptr addrspace(1) %out, i32 %src0) #1 { + %oldval = load i32, ptr addrspace(1) %out %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 32, i32 %oldval) #0 - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } @@ -64,9 +64,9 @@ define amdgpu_kernel void @test_writelane_imm(i32 addrspace(1)* %out, i32 %src0) ; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], s{{[0-9]+}} ; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0 ; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { +define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void } @@ -74,9 +74,9 @@ define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 add ; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], 42 ; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0 ; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}} -define amdgpu_kernel void @test_writelane_imm_oldval(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { +define amdgpu_kernel void @test_writelane_imm_oldval(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { %writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 42) - store i32 %writelane, i32 addrspace(1)* %out, align 4 + store i32 %writelane, ptr addrspace(1) %out, align 4 ret void }