24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.gfx11.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
store i32 %val, i32 addrspace(1)* %out
define amdgpu_kernel void @ds_ordered_add(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
store i32 %val, ptr addrspace(1) %out
ret void
}

Expand All @@ -16,8 +16,8 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr
; GCN: s_mov_b32 m0, s0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
define amdgpu_cs float @ds_ordered_add_cs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -27,8 +27,8 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
; GCN: s_mov_b32 m0, s0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
define amdgpu_ps float @ds_ordered_add_ps(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -38,8 +38,8 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
; GCN: s_mov_b32 m0, s0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
define amdgpu_vs float @ds_ordered_add_vs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -49,10 +49,10 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
; GCN: s_mov_b32 m0, s0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 16777217, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}

declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
store i32 %val, i32 addrspace(1)* %out
define amdgpu_kernel void @ds_ordered_add(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
store i32 %val, ptr addrspace(1) %out
ret void
}

Expand All @@ -23,29 +23,29 @@ define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addr
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
store i32 %val, i32 addrspace(1)* %out
define amdgpu_kernel void @ds_ordered_add_counter2(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
store i32 %val, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
store i32 %val, i32 addrspace(1)* %out
define amdgpu_kernel void @ds_ordered_add_nodone(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
store i32 %val, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
; GCN-DAG: s_mov_b32 m0,
; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
store i32 %val, i32 addrspace(1)* %out
define amdgpu_kernel void @ds_ordered_add_norelease(ptr addrspace(2) inreg %gds, ptr addrspace(1) %out) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
store i32 %val, ptr addrspace(1) %out
ret void
}

Expand All @@ -55,8 +55,8 @@ define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
define amdgpu_cs float @ds_ordered_add_cs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -68,7 +68,7 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define float @ds_ordered_add_default_cc() {
%val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -80,7 +80,7 @@ define float @ds_ordered_add_default_cc() {
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define fastcc float @ds_ordered_add_fastcc() {
%val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%val = call i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -92,7 +92,7 @@ define fastcc float @ds_ordered_add_fastcc() {
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define float @ds_ordered_add_func() {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -103,8 +103,8 @@ define float @ds_ordered_add_func() {
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
define amdgpu_ps float @ds_ordered_add_ps(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -115,8 +115,8 @@ define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
define amdgpu_vs float @ds_ordered_add_vs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -127,10 +127,10 @@ define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
define amdgpu_gs float @ds_ordered_add_gs(ptr addrspace(2) inreg %gds) {
%val = call i32@llvm.amdgcn.ds.ordered.add(ptr addrspace(2) %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}

declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
declare i32 @llvm.amdgcn.ds.ordered.add(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
%val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
define amdgpu_cs float @ds_ordered_swap(ptr addrspace(2) inreg %gds, i32 %value) {
%val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%r = bitcast i32 %val to float
ret float %r
}
Expand All @@ -31,13 +31,13 @@ define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value
; GCN-NEXT: s_waitcnt expcnt(0)
; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
define amdgpu_cs float @ds_ordered_swap_conditional(ptr addrspace(2) inreg %gds, i32 %value) {
entry:
%c = icmp ne i32 %value, 0
br i1 %c, label %if-true, label %endif

if-true:
%val = call i32@llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
%val = call i32@llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
br label %endif

endif:
Expand All @@ -46,4 +46,4 @@ endif:
ret float %r
}

declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
declare i32 @llvm.amdgcn.ds.ordered.swap(ptr addrspace(2) nocapture, i32, i32, i32, i1, i32, i1, i1)
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.permute.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,18 @@ declare i32 @llvm.amdgcn.ds.permute(i32, i32) #0

; CHECK-LABEL: {{^}}ds_permute:
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @ds_permute(i32 addrspace(1)* %out, i32 %index, i32 %src) nounwind {
define amdgpu_kernel void @ds_permute(ptr addrspace(1) %out, i32 %index, i32 %src) nounwind {
%bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
store i32 %bpermute, i32 addrspace(1)* %out, align 4
store i32 %bpermute, ptr addrspace(1) %out, align 4
ret void
}

; CHECK-LABEL: {{^}}ds_permute_imm_offset:
; CHECK: ds_permute_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset:4
define amdgpu_kernel void @ds_permute_imm_offset(i32 addrspace(1)* %out, i32 %base_index, i32 %src) nounwind {
define amdgpu_kernel void @ds_permute_imm_offset(ptr addrspace(1) %out, i32 %base_index, i32 %src) nounwind {
%index = add i32 %base_index, 4
%bpermute = call i32 @llvm.amdgcn.ds.permute(i32 %index, i32 %src) #0
store i32 %bpermute, i32 addrspace(1)* %out, align 4
store i32 %bpermute, ptr addrspace(1) %out, align 4
ret void
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.sub.gs.reg.rtn.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ define amdgpu_gs void @test_sub_32(i32 %arg) {
ret void
}

define amdgpu_gs void @test_sub_32_use(i32 %arg, i32 addrspace(1)* %out) {
define amdgpu_gs void @test_sub_32_use(i32 %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_sub_32_use:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Expand All @@ -34,7 +34,7 @@ define amdgpu_gs void @test_sub_32_use(i32 %arg, i32 addrspace(1)* %out) {
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i32 @llvm.amdgcn.ds.sub.gs.reg.rtn.i32(i32 %arg, i32 16)
store i32 %res, i32 addrspace(1)* %out, align 4
store i32 %res, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -53,7 +53,7 @@ define amdgpu_gs void @test_sub_64(i32 %arg) {
ret void
}

define amdgpu_gs void @test_sub_64_use(i32 %arg, i64 addrspace(1)* %out) {
define amdgpu_gs void @test_sub_64_use(i32 %arg, ptr addrspace(1) %out) {
; CHECK-LABEL: test_sub_64_use:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Expand All @@ -67,7 +67,7 @@ define amdgpu_gs void @test_sub_64_use(i32 %arg, i64 addrspace(1)* %out) {
; CHECK-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; CHECK-NEXT: s_endpgm
%res = call i64 @llvm.amdgcn.ds.sub.gs.reg.rtn.i64(i32 %arg, i32 32)
store i64 %res, i64 addrspace(1)* %out, align 4
store i64 %res, ptr addrspace(1) %out, align 4
ret void
}

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0

; CHECK-LABEL: {{^}}ds_swizzle:
; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind {
define amdgpu_kernel void @ds_swizzle(ptr addrspace(1) %out, i32 %src) nounwind {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
store i32 %swizzle, i32 addrspace(1)* %out, align 4
store i32 %swizzle, ptr addrspace(1) %out, align 4
ret void
}

Expand Down
8 changes: 3 additions & 5 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.exp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -629,12 +629,10 @@ define amdgpu_kernel void @test_export_across_store_load(i32 %idx, float %v) #0
%data0 = alloca <4 x float>, align 8, addrspace(5)
%data1 = alloca <4 x float>, align 8, addrspace(5)
%cmp = icmp eq i32 %idx, 1
%data = select i1 %cmp, <4 x float> addrspace(5)* %data0, <4 x float> addrspace(5)* %data1
%sptr = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data, i32 0, i32 0
store float %v, float addrspace(5)* %sptr, align 8
%data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1
store float %v, ptr addrspace(5) %data, align 8
call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false)
%ptr0 = getelementptr inbounds <4 x float>, <4 x float> addrspace(5)* %data0, i32 0, i32 0
%load0 = load float, float addrspace(5)* %ptr0, align 8
%load0 = load float, ptr addrspace(5) %data0, align 8
call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false)
ret void
Expand Down
168 changes: 84 additions & 84 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w32.ll

Large diffs are not rendered by default.

168 changes: 84 additions & 84 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdiv.fast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ declare float @llvm.amdgcn.fdiv.fast(float, float) #0
; CHECK: v_rcp_f32_e32
; CHECK: v_mul_f32_e32
; CHECK: v_mul_f32_e32
define amdgpu_kernel void @test_fdiv_fast(float addrspace(1)* %out, float %a, float %b) #1 {
define amdgpu_kernel void @test_fdiv_fast(ptr addrspace(1) %out, float %a, float %b) #1 {
%fdiv = call float @llvm.amdgcn.fdiv.fast(float %a, float %b)
store float %fdiv, float addrspace(1)* %out
store float %fdiv, ptr addrspace(1) %out
ret void
}

Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.bf16.bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16(
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
i16 addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
<2 x i16> addrspace(1)* %b,
i16 addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
%c.val = load i16, i16 addrspace(1)* %c
%a.val = load <2 x i16>, ptr addrspace(1) %a
%b.val = load <2 x i16>, ptr addrspace(1) %b
%c.val = load i16, ptr addrspace(1) %c
%r.val = call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a.val, <2 x i16> %b.val, i16 %c.val)
store i16 %r.val, i16 addrspace(1)* %r
store i16 %r.val, ptr addrspace(1) %r
ret void
}

Expand Down Expand Up @@ -57,19 +57,19 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_bf16_bf16_dpp(
; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
i16 addrspace(5)* %r,
<2 x i16> addrspace(5)* %a,
<2 x i16> addrspace(5)* %b,
i16 addrspace(5)* %c) {
ptr addrspace(5) %r,
ptr addrspace(5) %a,
ptr addrspace(5) %b,
ptr addrspace(5) %c) {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(5)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(5)* %b
%c.val = load i16, i16 addrspace(5)* %c
%a.val = load <2 x i16>, ptr addrspace(5) %a
%b.val = load <2 x i16>, ptr addrspace(5) %b
%c.val = load i16, ptr addrspace(5) %c
%a.val.i32 = bitcast <2 x i16> %a.val to i32
%dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1)
%a.val.dpp.v2i16 = bitcast i32 %dpp to <2 x i16>
%r.val = call i16 @llvm.amdgcn.fdot2.bf16.bf16(<2 x i16> %a.val.dpp.v2i16, <2 x i16> %b.val, i16 %c.val)
store i16 %r.val, i16 addrspace(5)* %r
store i16 %r.val, ptr addrspace(5) %r
ret void
}

Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f16.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16(
; GFX11-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
half addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
half addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
%a.val = load <2 x half>, ptr addrspace(1) %a
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a.val, <2 x half> %b.val, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand Down Expand Up @@ -57,19 +57,19 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f16_f16_dpp(
; GISEL-GFX11-NEXT: scratch_store_b16 off, v0, s0
; GISEL-GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GISEL-GFX11-NEXT: s_endpgm
half addrspace(5)* %r,
<2 x half> addrspace(5)* %a,
<2 x half> addrspace(5)* %b,
half addrspace(5)* %c) {
ptr addrspace(5) %r,
ptr addrspace(5) %a,
ptr addrspace(5) %b,
ptr addrspace(5) %c) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(5)* %a
%b.val = load <2 x half>, <2 x half> addrspace(5)* %b
%c.val = load half, half addrspace(5)* %c
%a.val = load <2 x half>, ptr addrspace(5) %a
%b.val = load <2 x half>, ptr addrspace(5) %b
%c.val = load half, ptr addrspace(5) %c
%a.val.i32 = bitcast <2 x half> %a.val to i32
%dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %a.val.i32, i32 %a.val.i32, i32 1, i32 15, i32 15, i1 1)
%a.val.dpp.v2half = bitcast i32 %dpp to <2 x half>
%r.val = call half @llvm.amdgcn.fdot2.f16.f16(<2 x half> %a.val.dpp.v2half, <2 x half> %b.val, half %c.val)
store half %r.val, half addrspace(5)* %r
store half %r.val, ptr addrspace(5) %r
ret void
}

Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.f32.bf16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_clamp(
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
float addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
<2 x i16> addrspace(1)* %b,
float addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%a.val = load <2 x i16>, ptr addrspace(1) %a
%b.val = load <2 x i16>, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 1)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -48,15 +48,15 @@ define amdgpu_kernel void @test_llvm_amdgcn_fdot2_f32_bf16_no_clamp(
; GFX11-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
float addrspace(1)* %r,
<2 x i16> addrspace(1)* %a,
<2 x i16> addrspace(1)* %b,
float addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x i16>, <2 x i16> addrspace(1)* %a
%b.val = load <2 x i16>, <2 x i16> addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%a.val = load <2 x i16>, ptr addrspace(1) %a
%b.val = load <2 x i16>, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fdot2.f32.bf16(<2 x i16> %a.val, <2 x i16> %b.val, float %c.val, i1 0)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fdot2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ declare float @llvm.amdgcn.fdot2(<2 x half> %a, <2 x half> %b, float %c, i1 %cla
; GFX9: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
; GFX10: v_dot2_f32_f16 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} clamp{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_clamp(
float addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
float addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%a.val = load <2 x half>, ptr addrspace(1) %a
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 1)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -28,16 +28,16 @@ entry:
; GFX940: v_dot2c_f32_f16_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
; GFX10: {{v_dot2c_f32_f16_e32|v_dot2acc_f32_f16}} v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}{{$}}
define amdgpu_kernel void @test_llvm_amdgcn_fdot2_no_clamp(
float addrspace(1)* %r,
<2 x half> addrspace(1)* %a,
<2 x half> addrspace(1)* %b,
float addrspace(1)* %c) {
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
entry:
%a.val = load <2 x half>, <2 x half> addrspace(1)* %a
%b.val = load <2 x half>, <2 x half> addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
%a.val = load <2 x half>, ptr addrspace(1) %a
%b.val = load <2 x half>, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fdot2(<2 x half> %a.val, <2 x half> %b.val, float %c.val, i1 0)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

Expand Down
100 changes: 50 additions & 50 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,106 +7,106 @@ declare half @llvm.amdgcn.fmad.ftz.f16(half %a, half %b, half %c)
; GCN-LABEL: {{^}}mad_f16:
; GCN: v_mac_f16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+$}}
define amdgpu_kernel void @mad_f16(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
half addrspace(1)* %c) {
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_imm_a:
; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+}}
define amdgpu_kernel void @mad_f16_imm_a(
half addrspace(1)* %r,
half addrspace(1)* %b,
half addrspace(1)* %c) {
%b.val = load half, half addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half 8.0, half %b.val, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_imm_b:
; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
define amdgpu_kernel void @mad_f16_imm_b(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %c) {
%a.val = load half, half addrspace(1)* %a
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %c) {
%a.val = load half, ptr addrspace(1) %a
%c.val = load half, ptr addrspace(1) %c
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half 8.0, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_imm_c:
; GCN: v_madak_f16 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4800{{$}}
define amdgpu_kernel void @mad_f16_imm_c(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b) {
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %b.val, half 8.0)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_neg_b:
; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @mad_f16_neg_b(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
half addrspace(1)* %c) {
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%neg.b = fsub half -0.0, %b.val
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.b, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_abs_b:
; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f16_abs_b(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
half addrspace(1)* %c) {
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%abs.b = call half @llvm.fabs.f16(half %b.val)
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %abs.b, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f16_neg_abs_b:
; GFX8: v_mad_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
; GFX9: v_mad_legacy_f16 v{{[0-9]+}}, v{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f16_neg_abs_b(
half addrspace(1)* %r,
half addrspace(1)* %a,
half addrspace(1)* %b,
half addrspace(1)* %c) {
%a.val = load half, half addrspace(1)* %a
%b.val = load half, half addrspace(1)* %b
%c.val = load half, half addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load half, ptr addrspace(1) %a
%b.val = load half, ptr addrspace(1) %b
%c.val = load half, ptr addrspace(1) %c
%abs.b = call half @llvm.fabs.f16(half %b.val)
%neg.abs.b = fsub half -0.0, %abs.b
%r.val = call half @llvm.amdgcn.fmad.ftz.f16(half %a.val, half %neg.abs.b, half %c.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}

Expand Down
100 changes: 50 additions & 50 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,42 +8,42 @@ declare float @llvm.amdgcn.fmad.ftz.f32(float %a, float %b, float %c)
; GCN-LABEL: {{^}}mad_f32:
; GCN: v_ma{{[dc]}}_f32
define amdgpu_kernel void @mad_f32(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load float, ptr addrspace(1) %a
%b.val = load float, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f32_imm_a:
; GCN: v_madmk_f32 {{v[0-9]+}}, {{v[0-9]+}}, 0x41000000,
define amdgpu_kernel void @mad_f32_imm_a(
float addrspace(1)* %r,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%b.val = load float, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float 8.0, float %b.val, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f32_imm_b:
; GCN: v_mov_b32_e32 [[KB:v[0-9]+]], 0x41000000
; GCN: v_mac_f32_e32 {{v[0-9]+}}, {{[s][0-9]+}}, [[KB]]
define amdgpu_kernel void @mad_f32_imm_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %c) {
%a.val = load float, ptr addrspace(1) %a
%c.val = load float, ptr addrspace(1) %c
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float 8.0, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -54,62 +54,62 @@ define amdgpu_kernel void @mad_f32_imm_b(
; GCN: v_mov_b32_e32 [[VB:v[0-9]+]], [[B]]
; GCN: v_mac_f32_e32 [[C]], {{s[0-9]+}}, [[VB]]{{$}}
define amdgpu_kernel void @mad_f32_imm_c(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b) {
%a.val = load float, ptr addrspace(1) %a
%b.val = load float, ptr addrspace(1) %b
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %b.val, float 8.0)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f32_neg_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_neg_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load float, ptr addrspace(1) %a
%b.val = load float, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%neg.b = fneg float %b.val
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f32_abs_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, |v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_abs_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load float, ptr addrspace(1) %a
%b.val = load float, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%abs.b = call float @llvm.fabs.f32(float %b.val)
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %abs.b, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

; GCN-LABEL: {{^}}mad_f32_neg_abs_b:
; GCN: v_mad_f32 v{{[0-9]+}}, s{{[0-9]+}}, -|v{{[0-9]+}}|, v{{[0-9]+}}
define amdgpu_kernel void @mad_f32_neg_abs_b(
float addrspace(1)* %r,
float addrspace(1)* %a,
float addrspace(1)* %b,
float addrspace(1)* %c) {
%a.val = load float, float addrspace(1)* %a
%b.val = load float, float addrspace(1)* %b
%c.val = load float, float addrspace(1)* %c
ptr addrspace(1) %r,
ptr addrspace(1) %a,
ptr addrspace(1) %b,
ptr addrspace(1) %c) {
%a.val = load float, ptr addrspace(1) %a
%b.val = load float, ptr addrspace(1) %b
%c.val = load float, ptr addrspace(1) %c
%abs.b = call float @llvm.fabs.f32(float %b.val)
%neg.abs.b = fneg float %abs.b
%r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val)
store float %r.val, float addrspace(1)* %r
store float %r.val, ptr addrspace(1) %r
ret void
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@

; GCN-LABEL: {{^}}test_fmed3_f16:
; GCN: v_med3_f16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_fmed3_f16(half addrspace(1)* %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
define amdgpu_kernel void @test_fmed3_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
%src0.f16 = trunc i32 %src0.arg to i16
%src0 = bitcast i16 %src0.f16 to half
%src1.f16 = trunc i32 %src1.arg to i16
%src1 = bitcast i16 %src1.f16 to half
%src2.f16 = trunc i32 %src2.arg to i16
%src2 = bitcast i16 %src2.f16 to half
%mad = call half @llvm.amdgcn.fmed3.f16(half %src0, half %src1, half %src2)
store half %mad, half addrspace(1)* %out
store half %mad, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fmed3_srcmods_f16:
; GCN: v_med3_f16 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
define amdgpu_kernel void @test_fmed3_srcmods_f16(half addrspace(1)* %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
define amdgpu_kernel void @test_fmed3_srcmods_f16(ptr addrspace(1) %out, i32 %src0.arg, i32 %src1.arg, i32 %src2.arg) #1 {
%src0.f16 = trunc i32 %src0.arg to i16
%src0 = bitcast i16 %src0.f16 to half
%src1.f16 = trunc i32 %src1.arg to i16
Expand All @@ -28,7 +28,7 @@ define amdgpu_kernel void @test_fmed3_srcmods_f16(half addrspace(1)* %out, i32 %
%src2.fabs = call half @llvm.fabs.f16(half %src2)
%src2.fneg.fabs = fsub half -0.0, %src2.fabs
%mad = call half @llvm.amdgcn.fmed3.f16(half %src0.fneg, half %src1.fabs, half %src2.fneg.fabs)
store half %mad, half addrspace(1)* %out
store half %mad, ptr addrspace(1) %out
ret void
}

Expand Down
38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,62 +3,62 @@

; GCN-LABEL: {{^}}test_fmed3:
; GCN: v_med3_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
define amdgpu_kernel void @test_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
store float %med3, float addrspace(1)* %out
store float %med3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fmed3_srcmods:
; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
define amdgpu_kernel void @test_fmed3_srcmods(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
define amdgpu_kernel void @test_fmed3_srcmods(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
%src0.fneg = fsub float -0.0, %src0
%src1.fabs = call float @llvm.fabs.f32(float %src1)
%src2.fabs = call float @llvm.fabs.f32(float %src2)
%src2.fneg.fabs = fsub float -0.0, %src2.fabs
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0.fneg, float %src1.fabs, float %src2.fneg.fabs)
store float %med3, float addrspace(1)* %out
store float %med3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fneg_fmed3:
; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
define amdgpu_kernel void @test_fneg_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
define amdgpu_kernel void @test_fneg_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
%neg.med3 = fsub float -0.0, %med3
store float %neg.med3, float addrspace(1)* %out
store float %neg.med3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fneg_fmed3_multi_use:
; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]]
define amdgpu_kernel void @test_fneg_fmed3_multi_use(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
define amdgpu_kernel void @test_fneg_fmed3_multi_use(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
%neg.med3 = fsub float -0.0, %med3
%med3.user = fmul float %med3, 4.0
store volatile float %med3.user, float addrspace(1)* %out
store volatile float %neg.med3, float addrspace(1)* %out
store volatile float %med3.user, ptr addrspace(1) %out
store volatile float %neg.med3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fabs_fmed3:
; GCN: v_med3_f32 [[MED3:v[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7fffffff, [[MED3]]
define amdgpu_kernel void @test_fabs_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
define amdgpu_kernel void @test_fabs_fmed3(ptr addrspace(1) %out, float %src0, float %src1, float %src2) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
%fabs.med3 = call float @llvm.fabs.f32(float %med3)
store float %fabs.med3, float addrspace(1)* %out
store float %fabs.med3, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fneg_fmed3_rr_0:
; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
define amdgpu_kernel void @test_fneg_fmed3_rr_0(ptr addrspace(1) %out, float %src0, float %src1) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
%neg.med3 = fsub float -0.0, %med3
store float %neg.med3, float addrspace(1)* %out
store float %neg.med3, ptr addrspace(1) %out
ret void
}

Expand All @@ -67,22 +67,22 @@ define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float
; GCN: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(float addrspace(1)* %out, float %src0, float %src1, float %mul.arg) #1 {
define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(ptr addrspace(1) %out, float %src0, float %src1, float %mul.arg) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
%neg.med3 = fsub float -0.0, %med3
%mul = fmul float %neg.med3, %mul.arg
store float %mul, float addrspace(1)* %out
store float %mul, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}test_fneg_fmed3_r_inv2pi_0:
; GCN-DAG: v_bfrev_b32_e32 [[NEG0:v[0-9]+]], 1
; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out, float %src0) #1 {
define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(ptr addrspace(1) %out, float %src0) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
%neg.med3 = fsub float -0.0, %med3
store float %neg.med3, float addrspace(1)* %out
store float %neg.med3, ptr addrspace(1) %out
ret void
}

Expand All @@ -91,11 +91,11 @@ define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out,
; GCN-DAG: v_mov_b32_e32 [[NEG_INV:v[0-9]+]], 0xbe22f983
; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(ptr addrspace(1) %out, float %src0, float %mul.arg) #1 {
%med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
%neg.med3 = fsub float -0.0, %med3
%mul = fmul float %neg.med3, %mul.arg
store float %mul, float addrspace(1)* %out
store float %mul, ptr addrspace(1) %out
ret void
}

Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmul.legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,38 @@
; GCN-LABEL: {{^}}test_mul_legacy_f32:
; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b) #0 {
define amdgpu_kernel void @test_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
store float %result, float addrspace(1)* %out, align 4
store float %result, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}test_mul_legacy_undef0_f32:
; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_mul_legacy_undef0_f32(float addrspace(1)* %out, float %a) #0 {
define amdgpu_kernel void @test_mul_legacy_undef0_f32(ptr addrspace(1) %out, float %a) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float undef, float %a)
store float %result, float addrspace(1)* %out, align 4
store float %result, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}test_mul_legacy_undef1_f32:
; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
define amdgpu_kernel void @test_mul_legacy_undef1_f32(float addrspace(1)* %out, float %a) #0 {
define amdgpu_kernel void @test_mul_legacy_undef1_f32(ptr addrspace(1) %out, float %a) #0 {
%result = call float @llvm.amdgcn.fmul.legacy(float %a, float undef)
store float %result, float addrspace(1)* %out, align 4
store float %result, ptr addrspace(1) %out, align 4
ret void
}

; GCN-LABEL: {{^}}test_mul_legacy_fabs_f32:
; GCN: v_mul_legacy_f32{{[_e3264]*}} v{{[0-9]+}}, |s{{[0-9]+}}|, |{{[sv][0-9]+}}|
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, |s{{[0-9]+}}|, |s{{[0-9]+}}|
define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, float %a, float %b) #0 {
define amdgpu_kernel void @test_mul_legacy_fabs_f32(ptr addrspace(1) %out, float %a, float %b) #0 {
%a.fabs = call float @llvm.fabs.f32(float %a)
%b.fabs = call float @llvm.fabs.f32(float %b)
%result = call float @llvm.amdgcn.fmul.legacy(float %a.fabs, float %b.fabs)
store float %result, float addrspace(1)* %out, align 4
store float %result, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -50,10 +50,10 @@ define amdgpu_kernel void @test_mul_legacy_fabs_f32(float addrspace(1)* %out, fl
; GCN: v_add_f32_e{{(32|64)}} v{{[0-9]+}}, s{{[0-9]+}}, {{[sv][0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #0 {
define amdgpu_kernel void @test_add_mul_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #0 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
store float %add, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -66,10 +66,10 @@ define amdgpu_kernel void @test_add_mul_legacy_f32(float addrspace(1)* %out, flo
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
define amdgpu_kernel void @test_mad_legacy_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
store float %add, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -80,10 +80,10 @@ define amdgpu_kernel void @test_mad_legacy_f32(float addrspace(1)* %out, float %
; GFX101: v_mad_legacy_f32 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
; GFX103: v_mul_legacy_f32_e64 v{{[0-9]+}}, 0x41200000, s{{[0-9]+}}
; GFX103: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, float %a, float %c) #2 {
define amdgpu_kernel void @test_mad_legacy_f32_imm(ptr addrspace(1) %out, float %a, float %c) #2 {
%mul = call float @llvm.amdgcn.fmul.legacy(float %a, float 10.0)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
store float %add, ptr addrspace(1) %out, align 4
ret void
}

Expand All @@ -93,12 +93,12 @@ define amdgpu_kernel void @test_mad_legacy_f32_imm(float addrspace(1)* %out, flo
; NOMADMACF32: v_add_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
; GFX11: v_mul_dx9_zero_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, -s{{[0-9]+}}
; GFX11: v_dual_mov_b32 v{{[0-9]+}}, 0 :: v_dual_add_f32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_kernel void @test_mad_legacy_fneg_f32(float addrspace(1)* %out, float %a, float %b, float %c) #2 {
define amdgpu_kernel void @test_mad_legacy_fneg_f32(ptr addrspace(1) %out, float %a, float %b, float %c) #2 {
%a.fneg = fneg float %a
%b.fneg = fneg float %b
%mul = call float @llvm.amdgcn.fmul.legacy(float %a.fneg, float %b.fneg)
%add = fadd float %mul, %c
store float %add, float addrspace(1)* %out, align 4
store float %add, ptr addrspace(1) %out, align 4
ret void
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.fract.f16(half %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @fract_f16(
half addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.amdgcn.fract.f16(half %a.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@ declare double @llvm.amdgcn.fract.f64(double) #0

; GCN-LABEL: {{^}}v_fract_f32:
; GCN: v_fract_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @v_fract_f32(float addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @v_fract_f32(ptr addrspace(1) %out, float %src) #1 {
%fract = call float @llvm.amdgcn.fract.f32(float %src)
store float %fract, float addrspace(1)* %out
store float %fract, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}v_fract_f64:
; GCN: v_fract_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @v_fract_f64(double addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @v_fract_f64(ptr addrspace(1) %out, double %src) #1 {
%fract = call double @llvm.amdgcn.fract.f64(double %src)
store double %fract, double addrspace(1)* %out
store double %fract, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}v_fract_undef_f32:
; GCN-NOT: v_fract_f32
; GCN-NOT: store_dword
define amdgpu_kernel void @v_fract_undef_f32(float addrspace(1)* %out) #1 {
define amdgpu_kernel void @v_fract_undef_f32(ptr addrspace(1) %out) #1 {
%fract = call float @llvm.amdgcn.fract.f32(float undef)
store float %fract, float addrspace(1)* %out
store float %fract, ptr addrspace(1) %out
ret void
}

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a)
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
; GCN: buffer_store_short v[[R_I16]]
define amdgpu_kernel void @frexp_exp_f16(
i16 addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.val = load half, ptr addrspace(1) %a
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
store i16 %r.val, i16 addrspace(1)* %r
store i16 %r.val, ptr addrspace(1) %r
ret void
}

Expand All @@ -22,13 +22,13 @@ entry:
; VI: v_bfe_i32 v[[R_I32:[0-9]+]], v[[R_I16]], 0, 16{{$}}
; GCN: buffer_store_dword v[[R_I32]]
define amdgpu_kernel void @frexp_exp_f16_sext(
i32 addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.val = load half, ptr addrspace(1) %a
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
%r.val.sext = sext i16 %r.val to i32
store i32 %r.val.sext, i32 addrspace(1)* %r
store i32 %r.val.sext, ptr addrspace(1) %r
ret void
}

Expand All @@ -37,12 +37,12 @@ entry:
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
; GCN: buffer_store_dword v[[R_I16]]
define amdgpu_kernel void @frexp_exp_f16_zext(
i32 addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.val = load half, ptr addrspace(1) %a
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
%r.val.zext = zext i16 %r.val to i32
store i32 %r.val.zext, i32 addrspace(1)* %r
store i32 %r.val.zext, ptr addrspace(1) %r
ret void
}
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.exp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,55 @@ declare i32 @llvm.amdgcn.frexp.exp.i32.f64(double) #0

; GCN-LABEL: {{^}}s_test_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fabs.src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f32:
; GCN: v_frexp_exp_i32_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(i32 addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%fneg.fabs.src = fsub float -0.0, %fabs.src
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f32(float %fneg.fabs.src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e32 {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fabs_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, |{{s\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fabs.src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_exp_f64:
; GCN: v_frexp_exp_i32_f64_e64 {{v[0-9]+}}, -|{{s\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(i32 addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_fneg_fabs_frexp_exp_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%fneg.fabs.src = fsub double -0.0, %fabs.src
%frexp.exp = call i32 @llvm.amdgcn.frexp.exp.i32.f64(double %fneg.fabs.src)
store i32 %frexp.exp, i32 addrspace(1)* %out
store i32 %frexp.exp, ptr addrspace(1) %out
ret void
}

Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ declare half @llvm.amdgcn.frexp.mant.f16(half %a)
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define amdgpu_kernel void @frexp_mant_f16(
half addrspace(1)* %r,
half addrspace(1)* %a) {
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
entry:
%a.val = load half, half addrspace(1)* %a
%a.val = load half, ptr addrspace(1) %a
%r.val = call half @llvm.amdgcn.frexp.mant.f16(half %a.val)
store half %r.val, half addrspace(1)* %r
store half %r.val, ptr addrspace(1) %r
ret void
}
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.frexp.mant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,55 @@ declare double @llvm.amdgcn.frexp.mant.f64(double) #0

; GCN-LABEL: {{^}}s_test_frexp_mant_f32:
; GCN: v_frexp_mant_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}
define amdgpu_kernel void @s_test_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
%frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %src)
store float %frexp.mant, float addrspace(1)* %out
store float %frexp.mant, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f32:
; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, |{{s[0-9]+}}|
define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fabs.src)
store float %frexp.mant, float addrspace(1)* %out
store float %frexp.mant, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f32:
; GCN: v_frexp_mant_f32_e64 {{v[0-9]+}}, -|{{s[0-9]+}}|
define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(float addrspace(1)* %out, float %src) #1 {
define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f32(ptr addrspace(1) %out, float %src) #1 {
%fabs.src = call float @llvm.fabs.f32(float %src)
%fneg.fabs.src = fsub float -0.0, %fabs.src
%frexp.mant = call float @llvm.amdgcn.frexp.mant.f32(float %fneg.fabs.src)
store float %frexp.mant, float addrspace(1)* %out
store float %frexp.mant, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_frexp_mant_f64:
; GCN: v_frexp_mant_f64_e32 {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}
define amdgpu_kernel void @s_test_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
%frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %src)
store double %frexp.mant, double addrspace(1)* %out
store double %frexp.mant, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fabs_frexp_mant_f64:
; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, |{{s\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fabs.src)
store double %frexp.mant, double addrspace(1)* %out
store double %frexp.mant, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}s_test_fneg_fabs_frexp_mant_f64:
; GCN: v_frexp_mant_f64_e64 {{v\[[0-9]+:[0-9]+\]}}, -|{{s\[[0-9]+:[0-9]+\]}}|
define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(double addrspace(1)* %out, double %src) #1 {
define amdgpu_kernel void @s_test_fneg_fabs_frexp_mant_f64(ptr addrspace(1) %out, double %src) #1 {
%fabs.src = call double @llvm.fabs.f64(double %src)
%fneg.fabs.src = fsub double -0.0, %fabs.src
%frexp.mant = call double @llvm.amdgcn.frexp.mant.f64(double %fneg.fabs.src)
store double %frexp.mant, double addrspace(1)* %out
store double %frexp.mant, ptr addrspace(1) %out
ret void
}

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.global.load.lds.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX10
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s --check-prefix=GFX900-GISEL

declare void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr, i32 %size, i32 %offset, i32 %aux)
declare void @llvm.amdgcn.global.load.lds(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr, i32 %size, i32 %offset, i32 %aux)

define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
define amdgpu_ps void @global_load_lds_dword_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
; GFX900-LABEL: global_load_lds_dword_vaddr:
; GFX900: ; %bb.0: ; %main_body
; GFX900-NEXT: v_readfirstlane_b32 s0, v2
Expand Down Expand Up @@ -46,11 +46,11 @@ define amdgpu_ps void @global_load_lds_dword_vaddr(i8 addrspace(1)* nocapture %g
; GFX900-GISEL-NEXT: global_load_dword v[0:1], off offset:16 glc lds
; GFX900-GISEL-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 16, i32 1)
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 16, i32 1)
ret void
}

define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr) {
define amdgpu_ps void @global_load_lds_dword_saddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr) {
; GFX900-LABEL: global_load_lds_dword_saddr:
; GFX900: ; %bb.0: ; %main_body
; GFX900-NEXT: v_readfirstlane_b32 s2, v0
Expand Down Expand Up @@ -94,11 +94,11 @@ define amdgpu_ps void @global_load_lds_dword_saddr(i8 addrspace(1)* nocapture in
; GFX900-GISEL-NEXT: global_load_dword v0, s[0:1] offset:32 slc lds
; GFX900-GISEL-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 4, i32 32, i32 2)
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 4, i32 32, i32 2)
ret void
}

define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* nocapture inreg %gptr, i8 addrspace(3)* nocapture %lptr, i32 %voffset) {
define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(ptr addrspace(1) nocapture inreg %gptr, ptr addrspace(3) nocapture %lptr, i32 %voffset) {
; GFX900-LABEL: global_load_lds_dword_saddr_and_vaddr:
; GFX900: ; %bb.0: ; %main_body
; GFX900-NEXT: v_readfirstlane_b32 s2, v0
Expand Down Expand Up @@ -138,12 +138,12 @@ define amdgpu_ps void @global_load_lds_dword_saddr_and_vaddr(i8 addrspace(1)* no
; GFX900-GISEL-NEXT: s_endpgm
main_body:
%voffset.64 = zext i32 %voffset to i64
%gep = getelementptr i8, i8 addrspace(1)* %gptr, i64 %voffset.64
call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gep, i8 addrspace(3)* %lptr, i32 4, i32 48, i32 16)
%gep = getelementptr i8, ptr addrspace(1) %gptr, i64 %voffset.64
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gep, ptr addrspace(3) %lptr, i32 4, i32 48, i32 16)
ret void
}

define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
define amdgpu_ps void @global_load_lds_ushort_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
; GFX900-LABEL: global_load_lds_ushort_vaddr:
; GFX900: ; %bb.0: ; %main_body
; GFX900-NEXT: v_readfirstlane_b32 s0, v2
Expand Down Expand Up @@ -182,11 +182,11 @@ define amdgpu_ps void @global_load_lds_ushort_vaddr(i8 addrspace(1)* nocapture %
; GFX900-GISEL-NEXT: global_load_ushort v[0:1], off lds
; GFX900-GISEL-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 2, i32 0, i32 4)
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 2, i32 0, i32 4)
ret void
}

define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %gptr, i8 addrspace(3)* nocapture %lptr) {
define amdgpu_ps void @global_load_lds_ubyte_vaddr(ptr addrspace(1) nocapture %gptr, ptr addrspace(3) nocapture %lptr) {
; GFX900-LABEL: global_load_lds_ubyte_vaddr:
; GFX900: ; %bb.0: ; %main_body
; GFX900-NEXT: v_readfirstlane_b32 s0, v2
Expand Down Expand Up @@ -225,6 +225,6 @@ define amdgpu_ps void @global_load_lds_ubyte_vaddr(i8 addrspace(1)* nocapture %g
; GFX900-GISEL-NEXT: global_load_ubyte v[0:1], off lds
; GFX900-GISEL-NEXT: s_endpgm
main_body:
call void @llvm.amdgcn.global.load.lds(i8 addrspace(1)* %gptr, i8 addrspace(3)* %lptr, i32 1, i32 0, i32 0)
call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) %gptr, ptr addrspace(3) %lptr, i32 1, i32 0, i32 0)
ret void
}
34 changes: 17 additions & 17 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.groupstaticsize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,40 @@
; CHECK-LABEL: {{^}}groupstaticsize_test0:
; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo
; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0x800{{$}}
define amdgpu_kernel void @groupstaticsize_test0(float addrspace(1)* %out, i32 addrspace(1)* %lds_size) #0 {
define amdgpu_kernel void @groupstaticsize_test0(ptr addrspace(1) %out, ptr addrspace(1) %lds_size) #0 {
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 64
%static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
store i32 %static_lds_size, i32 addrspace(1)* %lds_size, align 4
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
store float %val0, float addrspace(1)* %out, align 4
store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
%val0 = load float, ptr addrspace(3) %arrayidx0, align 4
store float %val0, ptr addrspace(1) %out, align 4

ret void
}

; CHECK-LABEL: {{^}}groupstaticsize_test1:
; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo
; HSA: v_mov_b32_e32 v{{[0-9]+}}, 0xc00{{$}}
define amdgpu_kernel void @groupstaticsize_test1(float addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %lds_size) {
define amdgpu_kernel void @groupstaticsize_test1(ptr addrspace(1) %out, i32 %cond, ptr addrspace(1) %lds_size) {
entry:
%static_lds_size = call i32 @llvm.amdgcn.groupstaticsize() #1
store i32 %static_lds_size, i32 addrspace(1)* %lds_size, align 4
store i32 %static_lds_size, ptr addrspace(1) %lds_size, align 4
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%idx.0 = add nsw i32 %tid.x, 64
%tmp = icmp eq i32 %cond, 0
br i1 %tmp, label %if, label %else

if: ; preds = %entry
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
store float %val0, float addrspace(1)* %out, align 4
%arrayidx0 = getelementptr inbounds [512 x float], ptr addrspace(3) @lds0, i32 0, i32 %idx.0
%val0 = load float, ptr addrspace(3) %arrayidx0, align 4
store float %val0, ptr addrspace(1) %out, align 4
br label %endif

else: ; preds = %entry
%arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
%val1 = load float, float addrspace(3)* %arrayidx1, align 4
store float %val1, float addrspace(1)* %out, align 4
%arrayidx1 = getelementptr inbounds [256 x float], ptr addrspace(3) @lds1, i32 0, i32 %idx.0
%val1 = load float, ptr addrspace(3) %arrayidx1, align 4
store float %val1, ptr addrspace(1) %out, align 4
br label %endif

endif: ; preds = %else, %if
Expand All @@ -58,11 +58,11 @@ endif: ; preds = %else, %if
; CHECK-LABEL: {{^}}large_groupstaticsize:
; NOHSA: v_mov_b32_e32 v{{[0-9]+}}, llvm.amdgcn.groupstaticsize@abs32@lo
; HSA: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
define amdgpu_kernel void @large_groupstaticsize(i32 addrspace(1)* %size, i32 %idx) #0 {
%gep = getelementptr inbounds [4096 x i32], [4096 x i32] addrspace(3)* @large, i32 0, i32 %idx
store volatile i32 0, i32 addrspace(3)* %gep
define amdgpu_kernel void @large_groupstaticsize(ptr addrspace(1) %size, i32 %idx) #0 {
%gep = getelementptr inbounds [4096 x i32], ptr addrspace(3) @large, i32 0, i32 %idx
store volatile i32 0, ptr addrspace(3) %gep
%static_lds_size = call i32 @llvm.amdgcn.groupstaticsize()
store i32 %static_lds_size, i32 addrspace(1)* %size
store i32 %static_lds_size, ptr addrspace(1) %size
ret void
}

Expand Down
136 changes: 68 additions & 68 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w32.ll

Large diffs are not rendered by default.

136 changes: 68 additions & 68 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.icmp.w64.ll

Large diffs are not rendered by default.

42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ entry:
ret void
}

define amdgpu_kernel void @test_iglp_opt_mfma_gemm(<32 x float> addrspace(3)* noalias %in, <32 x float> addrspace(3)* noalias %out) #0 {
define amdgpu_kernel void @test_iglp_opt_mfma_gemm(ptr addrspace(3) noalias %in, ptr addrspace(3) noalias %out) #0 {
; GCN-LABEL: test_iglp_opt_mfma_gemm:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
Expand Down Expand Up @@ -122,31 +122,31 @@ define amdgpu_kernel void @test_iglp_opt_mfma_gemm(<32 x float> addrspace(3)* no
entry:
call void @llvm.amdgcn.iglp.opt(i32 0)
%idx = call i32 @llvm.amdgcn.workitem.id.x()
%load.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %in, i32 %idx
%load.0 = load <32 x float>, <32 x float> addrspace(3)* %load.0.addr
%load.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.0.addr, i32 64
%load.1 = load <32 x float>, <32 x float> addrspace(3)* %load.1.addr
%load.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.1.addr, i32 128
%load.2 = load <32 x float>, <32 x float> addrspace(3)* %load.2.addr
%load.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.2.addr, i32 192
%load.3 = load <32 x float>, <32 x float> addrspace(3)* %load.3.addr
%load.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %load.3.addr, i32 256
%load.4 = load <32 x float>, <32 x float> addrspace(3)* %load.4.addr
%load.0.addr = getelementptr <32 x float>, ptr addrspace(3) %in, i32 %idx
%load.0 = load <32 x float>, ptr addrspace(3) %load.0.addr
%load.1.addr = getelementptr <32 x float>, ptr addrspace(3) %load.0.addr, i32 64
%load.1 = load <32 x float>, ptr addrspace(3) %load.1.addr
%load.2.addr = getelementptr <32 x float>, ptr addrspace(3) %load.1.addr, i32 128
%load.2 = load <32 x float>, ptr addrspace(3) %load.2.addr
%load.3.addr = getelementptr <32 x float>, ptr addrspace(3) %load.2.addr, i32 192
%load.3 = load <32 x float>, ptr addrspace(3) %load.3.addr
%load.4.addr = getelementptr <32 x float>, ptr addrspace(3) %load.3.addr, i32 256
%load.4 = load <32 x float>, ptr addrspace(3) %load.4.addr
%mai.0 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.0, i32 0, i32 0, i32 0)
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.1, i32 0, i32 0, i32 0)
%mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.2, i32 0, i32 0, i32 0)
%mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.3, i32 0, i32 0, i32 0)
%mai.4 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %load.4, i32 0, i32 0, i32 0)
%store.0.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 %idx
store <32 x float> %mai.0, <32 x float> addrspace(3)* %store.0.addr
%store.1.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 64
store <32 x float> %mai.1, <32 x float> addrspace(3)* %store.1.addr
%store.2.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 128
store <32 x float> %mai.2, <32 x float> addrspace(3)* %store.2.addr
%store.3.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 192
store <32 x float> %mai.3, <32 x float> addrspace(3)* %store.3.addr
%store.4.addr = getelementptr <32 x float>, <32 x float> addrspace(3)* %out, i32 256
store <32 x float> %mai.4, <32 x float> addrspace(3)* %store.4.addr
%store.0.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 %idx
store <32 x float> %mai.0, ptr addrspace(3) %store.0.addr
%store.1.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 64
store <32 x float> %mai.1, ptr addrspace(3) %store.1.addr
%store.2.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 128
store <32 x float> %mai.2, ptr addrspace(3) %store.2.addr
%store.3.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 192
store <32 x float> %mai.3, ptr addrspace(3) %store.3.addr
%store.4.addr = getelementptr <32 x float>, ptr addrspace(3) %out, i32 256
store <32 x float> %mai.4, ptr addrspace(3) %store.4.addr
ret void
}

Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ main_body:

; GCN-LABEL: {{^}}load_1d_lwe:
; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}}
define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
main_body:
%v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -46,12 +46,12 @@ main_body:

; GCN-LABEL: {{^}}load_cube_lwe:
; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand All @@ -73,12 +73,12 @@ main_body:

; GCN-LABEL: {{^}}load_2darray_lwe:
; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}}
define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -269,13 +269,13 @@ main_body:

; GCN-LABEL: image_load_mmo
; GCN: image_load v1, v[{{[0-9:]+}}], s[0:7] dmask:0x1 unorm
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
store float 0.000000e+00, float addrspace(3)* %lds
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
store float 0.000000e+00, ptr addrspace(3) %lds
%c0 = extractelement <2 x i32> %c, i32 0
%c1 = extractelement <2 x i32> %c, i32 1
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.000000e+00, float addrspace(3)* %tmp2
%tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
store float 0.000000e+00, ptr addrspace(3) %tmp2
ret float %tex
}

Expand Down
68 changes: 34 additions & 34 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -141,11 +141,11 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -245,7 +245,7 @@ main_body:
%v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -284,7 +284,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t) {
define amdgpu_ps <4 x float> @load_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) {
; VERDE-LABEL: load_2d_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -389,7 +389,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -428,7 +428,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %r) {
define amdgpu_ps <4 x float> @load_3d_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) {
; VERDE-LABEL: load_3d_tfe_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -537,7 +537,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -576,7 +576,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
; VERDE-LABEL: load_cube_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -685,7 +685,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -724,7 +724,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %slice) {
define amdgpu_ps <4 x float> @load_1darray_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %slice) {
; VERDE-LABEL: load_1darray_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -829,7 +829,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -868,7 +868,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) {
define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice) {
; VERDE-LABEL: load_2darray_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -977,7 +977,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -1016,7 +1016,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
; VERDE-LABEL: load_2dmsaa_both:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -1125,7 +1125,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -1164,7 +1164,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
; VERDE-LABEL: load_2darraymsaa_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -1278,7 +1278,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -1317,7 +1317,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %mip) {
define amdgpu_ps <4 x float> @load_mip_1d_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %mip) {
; VERDE-LABEL: load_mip_1d_lwe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -1422,7 +1422,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.1d.v4f32i32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -1461,7 +1461,7 @@ main_body:
ret <4 x float> %v
}

define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %mip) {
define amdgpu_ps <4 x float> @load_mip_2d_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %mip) {
; VERDE-LABEL: load_mip_2d_tfe:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v5, v0
Expand Down Expand Up @@ -1570,7 +1570,7 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.mip.2d.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -1880,7 +1880,7 @@ main_body:
ret float %vv
}

define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask3(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_tfe_V4_dmask3:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v4, v0
Expand Down Expand Up @@ -1972,11 +1972,11 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 7, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_tfe_V4_dmask2:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v3, v0
Expand Down Expand Up @@ -2061,11 +2061,11 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 6, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <4 x float> @load_1d_tfe_V4_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_tfe_V4_dmask1:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v2, v0
Expand Down Expand Up @@ -2142,11 +2142,11 @@ main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) {
define amdgpu_ps <2 x float> @load_1d_tfe_V2_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s) {
; VERDE-LABEL: load_1d_tfe_V2_dmask1:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: v_mov_b32_e32 v2, v0
Expand Down Expand Up @@ -2223,7 +2223,7 @@ main_body:
%v = call {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<2 x float>, i32} %v, 0
%v.err = extractvalue {<2 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <2 x float> %v.vec
}

Expand Down Expand Up @@ -3779,7 +3779,7 @@ main_body:
ret void
}

define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 {
define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, ptr addrspace(3) %lds, <2 x i32> %c) #0 {
; VERDE-LABEL: image_load_mmo:
; VERDE: ; %bb.0:
; VERDE-NEXT: image_load v1, v[1:2], s[0:7] dmask:0x1 unorm
Expand Down Expand Up @@ -3843,12 +3843,12 @@ define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)
; GFX11-NEXT: v_mov_b32_e32 v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: ; return to shader part epilog
store float 0.000000e+00, float addrspace(3)* %lds
store float 0.000000e+00, ptr addrspace(3) %lds
%c0 = extractelement <2 x i32> %c, i32 0
%c1 = extractelement <2 x i32> %c, i32 1
%tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0)
%tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
store float 0.000000e+00, float addrspace(3)* %tmp2
%tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4
store float 0.000000e+00, ptr addrspace(3) %tmp2
ret float %tex
}

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ main_body:

; GCN-LABEL: {{^}}load_2dmsaa_both:
; GFX11: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x2 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ;
define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f32i32.i32(i32 2, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand All @@ -29,12 +29,12 @@ main_body:

; GCN-LABEL: {{^}}load_2darraymsaa_tfe:
; GFX11: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ;
define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down Expand Up @@ -72,12 +72,12 @@ main_body:

; GCN-LABEL: {{^}}load_2dmsaa_tfe_d16:
; GFX11: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe d16 ;
define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x half> @load_2dmsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2dmsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x half>, i32} %v, 0
%v.err = extractvalue {<4 x half>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x half> %v.vec
}

Expand All @@ -91,12 +91,12 @@ main_body:

; GCN-LABEL: {{^}}load_2darraymsaa_tfe_d16:
; GFX11: image_msaa_load v[0:2], v[0:3], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe d16 ;
define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
define amdgpu_ps <4 x half> @load_2darraymsaa_tfe_d16(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
%v = call {<4 x half>,i32} @llvm.amdgcn.image.msaa.load.2darraymsaa.v4f16i32.i32(i32 1, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x half>, i32} %v, 0
%v.err = extractvalue {<4 x half>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x half> %v.vec
}

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.msaa.load.x.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ main_body:

; GCN-LABEL: {{^}}load_2dmsaa_both:
; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA unorm tfe lwe ;
define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_both(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand All @@ -29,56 +29,56 @@ main_body:

; GCN-LABEL: {{^}}load_2darraymsaa_tfe:
; GFX10: image_msaa_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe ;
define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2darraymsaa_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2darraymsaa.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask3:
; GFX10: image_msaa_load v[0:3], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask3(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 7, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask2:
; GFX10: image_msaa_load v[0:2], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0x6 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask2(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 6, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

; GCN-LABEL: {{^}}load_2dmsaa_tfe_V4_dmask1:
; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <4 x float> @load_2dmsaa_tfe_V4_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v4f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

; GCN-LABEL: {{^}}load_2dmsaa_tfe_V2_dmask1:
; GFX10: image_msaa_load v[0:1], [v4, v3, v2], s[0:7] dmask:0x8 dim:SQ_RSRC_IMG_2D_MSAA unorm tfe ;
define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %fragid) {
define amdgpu_ps <2 x float> @load_2dmsaa_tfe_V2_dmask1(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %fragid) {
main_body:
%v = call {<2 x float>,i32} @llvm.amdgcn.image.msaa.load.x.2dmsaa.v2f32i32.i32(i32 8, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0)
%v.vec = extractvalue {<2 x float>, i32} %v, 0
%v.err = extractvalue {<2 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <2 x float> %v.vec
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.d16.dim.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ main_body:
ret half %tex
}

define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, i32 addrspace(1)* inreg %out) {
define amdgpu_ps half @image_sample_2d_f16_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, ptr addrspace(1) inreg %out) {
; TONGA-LABEL: image_sample_2d_f16_tfe:
; TONGA: ; %bb.0: ; %main_body
; TONGA-NEXT: s_mov_b64 s[14:15], exec
Expand Down Expand Up @@ -129,7 +129,7 @@ main_body:
%tex = call {half,i32} @llvm.amdgcn.image.sample.2d.f16i32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0)
%tex.vec = extractvalue {half, i32} %tex, 0
%tex.err = extractvalue {half, i32} %tex, 1
store i32 %tex.err, i32 addrspace(1)* %out, align 4
store i32 %tex.err, ptr addrspace(1) %out, align 4
ret half %tex.vec
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ main_body:
; GFX90A-LABEL: {{^}}sample_1d_lwe:
; GFX90A-NOT: s_wqm_b64
; GFX90A: image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf lwe
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 addrspace(1)* inreg %out, float %s) {
define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
main_body:
%v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
%v.vec = extractvalue {<4 x float>, i32} %v, 0
%v.err = extractvalue {<4 x float>, i32} %v, 1
store i32 %v.err, i32 addrspace(1)* %out, align 4
store i32 %v.err, ptr addrspace(1) %out, align 4
ret <4 x float> %v.vec
}

Expand Down
Loading