324 changes: 162 additions & 162 deletions llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll

Large diffs are not rendered by default.

19 changes: 9 additions & 10 deletions llvm/test/CodeGen/AMDGPU/r600.private-memory.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,16 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
; Additional check in case the move ends up in the last slot
; R600-NOT: MOV * TO.X

define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) {
define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) {
entry:
%0 = alloca [2 x i32], addrspace(5)
%1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0
%2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1
store i32 0, i32 addrspace(5)* %1
store i32 1, i32 addrspace(5)* %2
%3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in
%4 = load i32, i32 addrspace(5)* %3
%5 = call i32 @llvm.r600.read.tidig.x()
%6 = add i32 %4, %5
store i32 %6, i32 addrspace(1)* %out
%1 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 1
store i32 0, ptr addrspace(5) %0
store i32 1, ptr addrspace(5) %1
%2 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in
%3 = load i32, ptr addrspace(5) %2
%4 = call i32 @llvm.r600.read.tidig.x()
%5 = add i32 %3, %4
store i32 %5, ptr addrspace(1) %out
ret void
}
120 changes: 60 additions & 60 deletions llvm/test/CodeGen/AMDGPU/r600.sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,48 +3,48 @@
declare i32 @llvm.r600.read.tidig.x() readnone

; FUNC-LABEL: {{^}}s_sub_i32:
define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) {
%result = sub i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
store i32 %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}s_sub_imm_i32:
define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) {
define amdgpu_kernel void @s_sub_imm_i32(ptr addrspace(1) %out, i32 %a) {
%result = sub i32 1234, %a
store i32 %result, i32 addrspace(1)* %out
store i32 %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1)* %in
%b = load i32, i32 addrspace(1)* %b_ptr
define amdgpu_kernel void @test_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1
%a = load i32, ptr addrspace(1) %in
%b = load i32, ptr addrspace(1) %b_ptr
%result = sub i32 %a, %b
store i32 %result, i32 addrspace(1)* %out
store i32 %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_imm_i32:
; EG: SUB_INT
define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%a = load i32, i32 addrspace(1)* %in
define amdgpu_kernel void @test_sub_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%a = load i32, ptr addrspace(1) %in
%result = sub i32 123, %a
store i32 %result, i32 addrspace(1)* %out
store i32 %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_v2i32:
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
define amdgpu_kernel void @test_sub_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1
%a = load <2 x i32>, ptr addrspace(1) %in
%b = load <2 x i32>, ptr addrspace(1) %b_ptr
%result = sub <2 x i32> %a, %b
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
store <2 x i32> %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -53,48 +53,48 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
define amdgpu_kernel void @test_sub_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1
%a = load <4 x i32>, ptr addrspace(1) %in
%b = load <4 x i32>, ptr addrspace(1) %b_ptr
%result = sub <4 x i32> %a, %b
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
store <4 x i32> %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_i16:
define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) {
define amdgpu_kernel void @test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid
%b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1
%a = load volatile i16, i16 addrspace(1)* %gep
%b = load volatile i16, i16 addrspace(1)* %b_ptr
%gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid
%b_ptr = getelementptr i16, ptr addrspace(1) %gep, i32 1
%a = load volatile i16, ptr addrspace(1) %gep
%b = load volatile i16, ptr addrspace(1) %b_ptr
%result = sub i16 %a, %b
store i16 %result, i16 addrspace(1)* %out
store i16 %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_v2i16:
define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) {
define amdgpu_kernel void @test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid
%b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1
%a = load <2 x i16>, <2 x i16> addrspace(1)* %gep
%b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr
%gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid
%b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1
%a = load <2 x i16>, ptr addrspace(1) %gep
%b = load <2 x i16>, ptr addrspace(1) %b_ptr
%result = sub <2 x i16> %a, %b
store <2 x i16> %result, <2 x i16> addrspace(1)* %out
store <2 x i16> %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_sub_v4i16:
define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) {
define amdgpu_kernel void @test_sub_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%tid = call i32 @llvm.r600.read.tidig.x()
%gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid
%b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1
%a = load <4 x i16>, <4 x i16> addrspace(1) * %gep
%b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr
%gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid
%b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1
%a = load <4 x i16>, ptr addrspace(1) %gep
%b = load <4 x i16>, ptr addrspace(1) %b_ptr
%result = sub <4 x i16> %a, %b
store <4 x i16> %result, <4 x i16> addrspace(1)* %out
store <4 x i16> %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -104,9 +104,9 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind {
define amdgpu_kernel void @s_sub_i64(ptr addrspace(1) noalias %out, i64 %a, i64 %b) nounwind {
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
store i64 %result, ptr addrspace(1) %out, align 8
ret void
}

Expand All @@ -116,37 +116,37 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64
; EG-DAG: SUBB_UINT
; EG-DAG: SUB_INT
; EG-DAG: SUB_INT {{[* ]*}}
define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind {
define amdgpu_kernel void @v_sub_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid
%a = load i64, i64 addrspace(1)* %a_ptr
%b = load i64, i64 addrspace(1)* %b_ptr
%a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid
%b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid
%a = load i64, ptr addrspace(1) %a_ptr
%b = load i64, ptr addrspace(1) %b_ptr
%result = sub i64 %a, %b
store i64 %result, i64 addrspace(1)* %out, align 8
store i64 %result, ptr addrspace(1) %out, align 8
ret void
}

; FUNC-LABEL: {{^}}v_test_sub_v2i64:
define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) {
define amdgpu_kernel void @v_test_sub_v2i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid
%a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr
%b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr
%a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid
%b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid
%a = load <2 x i64>, ptr addrspace(1) %a_ptr
%b = load <2 x i64>, ptr addrspace(1) %b_ptr
%result = sub <2 x i64> %a, %b
store <2 x i64> %result, <2 x i64> addrspace(1)* %out
store <2 x i64> %result, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}v_test_sub_v4i64:
define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) {
define amdgpu_kernel void @v_test_sub_v4i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) {
%tid = call i32 @llvm.r600.read.tidig.x() readnone
%a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid
%a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr
%b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr
%a_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inA, i32 %tid
%b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inB, i32 %tid
%a = load <4 x i64>, ptr addrspace(1) %a_ptr
%b = load <4 x i64>, ptr addrspace(1) %b_ptr
%result = sub <4 x i64> %a, %b
store <4 x i64> %result, <4 x i64> addrspace(1)* %out
store <4 x i64> %result, ptr addrspace(1) %out
ret void
}
48 changes: 23 additions & 25 deletions llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,59 +3,59 @@

; FUNC-LABEL: {{^}}tgid_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T1.X
define amdgpu_kernel void @tgid_x(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_x(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.x() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}tgid_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T1.Y
define amdgpu_kernel void @tgid_y(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_y(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.y() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}tgid_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T1.Z
define amdgpu_kernel void @tgid_z(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tgid_z(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tgid.z() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}tidig_x:
; EG: MEM_RAT_CACHELESS STORE_RAW T0.X
define amdgpu_kernel void @tidig_x(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_x(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.x() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}tidig_y:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T0.Y
define amdgpu_kernel void @tidig_y(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_y(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.y() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}tidig_z:
; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X
; EG: MOV [[REG]].X, T0.Z
define amdgpu_kernel void @tidig_z(i32 addrspace(1)* %out) {
define amdgpu_kernel void @tidig_z(ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.r600.read.tidig.z() #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -65,28 +65,26 @@ entry:
; EG-NOT: VTX_READ
; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z
; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal
define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
%gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4
%value = load i32, i32 addrspace(7)* %gep
store i32 %value, i32 addrspace(1)* %out
define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 {
%implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
%gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 4
%value = load i32, ptr addrspace(7) %gep
store i32 %value, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}test_implicit_dyn:
; 36 prepended implicit bytes + 8(out pointer + in) = 44
; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3
define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 {
%implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr()
%header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)*
%gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in
%value = load i32, i32 addrspace(7)* %gep
store i32 %value, i32 addrspace(1)* %out
define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1 {
%implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr()
%gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 %in
%value = load i32, ptr addrspace(7) %gep
store i32 %value, ptr addrspace(1) %out
ret void
}

declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0
declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0

declare i32 @llvm.r600.read.tgid.x() #0
declare i32 @llvm.r600.read.tgid.y() #0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/r600cfg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ ELSE45: ; preds = %ENDIF40
ENDIF43: ; preds = %ELSE45, %IF44
%.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
%52 = bitcast i32 %.sink to float
%53 = load <4 x float>, <4 x float> addrspace(8)* null
%53 = load <4 x float>, ptr addrspace(8) null
%54 = extractelement <4 x float> %53, i32 0
%55 = bitcast float %54 to i32
br label %LOOP47
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 0(
define amdgpu_kernel void @test_0(i32 %in0, i32 addrspace(1)* %out) {
define amdgpu_kernel void @test_0(i32 %in0, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -17,10 +17,10 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 1(
define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) {
define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -29,10 +29,10 @@ entry:
; EG: MOV [[VAL]], literal.x
; EG-NEXT: LSHR
; EG-NEXT: 2(
define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) {
define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, ptr addrspace(1) %out) {
entry:
%0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0
store i32 %0, i32 addrspace(1)* %out
store i32 %0, ptr addrspace(1) %out
ret void
}

Expand All @@ -43,21 +43,21 @@ attributes #0 = { readnone }

!opencl.kernels = !{!0, !1, !2}

!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50}
!0 = !{ptr @test_0, !10, !20, !30, !40, !50}
!10 = !{!"kernel_arg_addr_space", i32 0, i32 1}
!20 = !{!"kernel_arg_access_qual", !"none", !"none"}
!30 = !{!"kernel_arg_type", !"sampler_t", !"int*"}
!40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"}
!50 = !{!"kernel_arg_type_qual", !"", !""}

!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51}
!1 = !{ptr @test_1, !11, !21, !31, !41, !51}
!11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1}
!21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"}
!31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"}
!41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"}
!51 = !{!"kernel_arg_type_qual", !"", !"", !""}

!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52}
!2 = !{ptr @test_2, !12, !22, !32, !42, !52}
!12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1}
!22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"}
!32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"}
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = sdiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand Down Expand Up @@ -72,31 +72,31 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

;EG-LABEL: {{^}}test_sdiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_sdiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = sdiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

;EG-LABEL: {{^}}test_srem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_srem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 33
%2 = ashr i64 %y, 33
%result = srem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -106,11 +106,11 @@ define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_sdiv2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = sdiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -120,10 +120,10 @@ define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_INT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_srem2464(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = ashr i64 %x, 40
%2 = ashr i64 %y, 40
%result = srem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/set-dx10.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,154 +8,154 @@
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_une_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_une_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_une_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp une float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_oeq_select_fptosi:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_oeq_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_oeq_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_oeq_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oeq float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_ogt_select_fptosi:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_ogt_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_ogt_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_ogt_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ogt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_oge_select_fptosi:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_oge_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_oge_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_oge_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp oge float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_ole_select_fptosi:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_ole_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_ole_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_ole_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp ole float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_olt_select_fptosi:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_olt_select_fptosi(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
%2 = fneg float %1
%3 = fptosi float %2 to i32
store i32 %3, i32 addrspace(1)* %out
store i32 %3, ptr addrspace(1) %out
ret void
}

; CHECK: {{^}}fcmp_olt_select_i32:
; CHECK: LSHR
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
; CHECK-NEXT: 1084227584(5.000000e+00)
define amdgpu_kernel void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
define amdgpu_kernel void @fcmp_olt_select_i32(ptr addrspace(1) %out, float %in) {
entry:
%0 = fcmp olt float %in, 5.0
%1 = select i1 %0, i32 -1, i32 0
store i32 %1, i32 addrspace(1)* %out
store i32 %1, ptr addrspace(1) %out
ret void
}
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@
; EG: LSHR {{\*?}} [[ADDR]]

; Works with the align 2 removed
define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind {
define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind {
%c = add <2 x i32> %a, %b
%x = shl <2 x i32> %c, <i32 6, i32 6>
%y = ashr <2 x i32> %x, <i32 7, i32 7>
store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2
store <2 x i32> %y, ptr addrspace(1) %out, align 2
ret void
}
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/swizzle-export.ll
Original file line number Diff line number Diff line change
Expand Up @@ -12,56 +12,56 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = extractelement <4 x float> %reg1, i32 2
%3 = extractelement <4 x float> %reg1, i32 3
%4 = load <4 x float>, <4 x float> addrspace(8)* null
%4 = load <4 x float>, ptr addrspace(8) null
%5 = extractelement <4 x float> %4, i32 1
%6 = load <4 x float>, <4 x float> addrspace(8)* null
%6 = load <4 x float>, ptr addrspace(8) null
%7 = extractelement <4 x float> %6, i32 2
%8 = load <4 x float>, <4 x float> addrspace(8)* null
%8 = load <4 x float>, ptr addrspace(8) null
%9 = extractelement <4 x float> %8, i32 0
%10 = fmul float 0.000000e+00, %9
%11 = load <4 x float>, <4 x float> addrspace(8)* null
%11 = load <4 x float>, ptr addrspace(8) null
%12 = extractelement <4 x float> %11, i32 0
%13 = fmul float %5, %12
%14 = load <4 x float>, <4 x float> addrspace(8)* null
%14 = load <4 x float>, ptr addrspace(8) null
%15 = extractelement <4 x float> %14, i32 0
%16 = fmul float 0.000000e+00, %15
%17 = load <4 x float>, <4 x float> addrspace(8)* null
%17 = load <4 x float>, ptr addrspace(8) null
%18 = extractelement <4 x float> %17, i32 0
%19 = fmul float 0.000000e+00, %18
%20 = load <4 x float>, <4 x float> addrspace(8)* null
%20 = load <4 x float>, ptr addrspace(8) null
%21 = extractelement <4 x float> %20, i32 0
%22 = fmul float %7, %21
%23 = load <4 x float>, <4 x float> addrspace(8)* null
%23 = load <4 x float>, ptr addrspace(8) null
%24 = extractelement <4 x float> %23, i32 0
%25 = fmul float 0.000000e+00, %24
%26 = load <4 x float>, <4 x float> addrspace(8)* null
%26 = load <4 x float>, ptr addrspace(8) null
%27 = extractelement <4 x float> %26, i32 0
%28 = fmul float 0.000000e+00, %27
%29 = load <4 x float>, <4 x float> addrspace(8)* null
%29 = load <4 x float>, ptr addrspace(8) null
%30 = extractelement <4 x float> %29, i32 0
%31 = fmul float 0.000000e+00, %30
%32 = load <4 x float>, <4 x float> addrspace(8)* null
%32 = load <4 x float>, ptr addrspace(8) null
%33 = extractelement <4 x float> %32, i32 0
%34 = fmul float 0.000000e+00, %33
%35 = load <4 x float>, <4 x float> addrspace(8)* null
%35 = load <4 x float>, ptr addrspace(8) null
%36 = extractelement <4 x float> %35, i32 0
%37 = fmul float 0.000000e+00, %36
%38 = load <4 x float>, <4 x float> addrspace(8)* null
%38 = load <4 x float>, ptr addrspace(8) null
%39 = extractelement <4 x float> %38, i32 0
%40 = fmul float 1.000000e+00, %39
%41 = load <4 x float>, <4 x float> addrspace(8)* null
%41 = load <4 x float>, ptr addrspace(8) null
%42 = extractelement <4 x float> %41, i32 0
%43 = fmul float 0.000000e+00, %42
%44 = load <4 x float>, <4 x float> addrspace(8)* null
%44 = load <4 x float>, ptr addrspace(8) null
%45 = extractelement <4 x float> %44, i32 0
%46 = fmul float 0.000000e+00, %45
%47 = load <4 x float>, <4 x float> addrspace(8)* null
%47 = load <4 x float>, ptr addrspace(8) null
%48 = extractelement <4 x float> %47, i32 0
%49 = fmul float 0.000000e+00, %48
%50 = load <4 x float>, <4 x float> addrspace(8)* null
%50 = load <4 x float>, ptr addrspace(8) null
%51 = extractelement <4 x float> %50, i32 0
%52 = fmul float 0.000000e+00, %51
%53 = load <4 x float>, <4 x float> addrspace(8)* null
%53 = load <4 x float>, ptr addrspace(8) null
%54 = extractelement <4 x float> %53, i32 0
%55 = fmul float 1.000000e+00, %54
%56 = insertelement <4 x float> undef, float %0, i32 0
Expand Down Expand Up @@ -102,12 +102,12 @@ main_body:
%1 = extractelement <4 x float> %reg1, i32 1
%2 = fadd float %0, 2.5
%3 = fmul float %1, 3.5
%4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
%4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1)
%5 = extractelement <4 x float> %4, i32 0
%6 = call float @llvm.cos.f32(float %5)
%7 = load <4 x float>, <4 x float> addrspace(8)* null
%7 = load <4 x float>, ptr addrspace(8) null
%8 = extractelement <4 x float> %7, i32 0
%9 = load <4 x float>, <4 x float> addrspace(8)* null
%9 = load <4 x float>, ptr addrspace(8) null
%10 = extractelement <4 x float> %9, i32 1
%11 = insertelement <4 x float> undef, float %2, i32 0
%12 = insertelement <4 x float> %11, float %3, i32 1
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@
;EG: BFE_UINT
;EG: BFE_UINT
;EG: BFE_UINT
define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_udiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = udiv i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand Down Expand Up @@ -72,31 +72,31 @@ define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) {
;EG: BFE_UINT
;EG: BFE_UINT
;EG: AND_INT {{.*}}, 1,
define amdgpu_kernel void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_urem(ptr addrspace(1) %out, i64 %x, i64 %y) {
%result = urem i64 %x, %y
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

;EG-LABEL: {{^}}test_udiv3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_udiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = udiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

;EG-LABEL: {{^}}test_urem3264:
;EG: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_urem3264(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 33
%2 = lshr i64 %y, 33
%result = urem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -106,11 +106,11 @@ define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_udiv2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 41
%2 = lshr i64 %y, 41
%result = udiv i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

Expand All @@ -120,17 +120,17 @@ define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y)
;EG: FLT_TO_UINT
;EG-NOT: RECIP_UINT
;EG-NOT: BFE_UINT
define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y) {
define amdgpu_kernel void @test_urem2364(ptr addrspace(1) %out, i64 %x, i64 %y) {
%1 = lshr i64 %x, 41
%2 = lshr i64 %y, 41
%result = urem i64 %1, %2
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}

;EG-LABEL: {{^}}test_udiv_k:
define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) {
define amdgpu_kernel void @test_udiv_k(ptr addrspace(1) %out, i64 %x) {
%result = udiv i64 24, %x
store i64 %result, i64 addrspace(1)* %out
store i64 %result, ptr addrspace(1) %out
ret void
}
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s

; CHECK-LABEL: {{^}}kernel_arg_i64:
define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind {
store i64 %a, i64 addrspace(1)* %out, align 8
define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind {
store i64 %a, ptr addrspace(1) %out, align 8
ret void
}

; i64 arg works, v1i64 arg does not.
; CHECK-LABEL: {{^}}kernel_arg_v1i64:
define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind {
store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8
define amdgpu_kernel void @kernel_arg_v1i64(ptr addrspace(1) %out, <1 x i64> %a) nounwind {
store <1 x i64> %a, ptr addrspace(1) %out, align 8
ret void
}

26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,29 @@
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00

define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%v = load i32, i32 addrspace(1)* %in
store i32 %v, i32 addrspace(1)* %out
define amdgpu_kernel void @vtx_fetch32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%v = load i32, ptr addrspace(1) %in
store i32 %v, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}vtx_fetch128:
; EG: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00
; CM: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x00,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x00,0x00

define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%v = load <4 x i32>, <4 x i32> addrspace(1)* %in
store <4 x i32> %v, <4 x i32> addrspace(1)* %out
define amdgpu_kernel void @vtx_fetch128(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%v = load <4 x i32>, ptr addrspace(1) %in
store <4 x i32> %v, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}vtx_fetch32_id3:
; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00
; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00

define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace(7)* %in) {
%v = load i32, i32 addrspace(7)* %in
store i32 %v, i32 addrspace(1)* %out
define amdgpu_kernel void @vtx_fetch32_id3(ptr addrspace(1) %out, ptr addrspace(7) %in) {
%v = load i32, ptr addrspace(7) %in
store i32 %v, ptr addrspace(1) %out
ret void
}

Expand All @@ -38,9 +38,9 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace

@t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3]

define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) {
%a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in
%v = load i32, i32 addrspace(4)* %a
store i32 %v, i32 addrspace(1)* %out
define amdgpu_kernel void @vtx_fetch32_id2(ptr addrspace(1) %out, i32 %in) {
%a = getelementptr inbounds [4 x i32], ptr addrspace(4) @t, i32 0, i32 %in
%v = load i32, ptr addrspace(4) %a
store i32 %v, ptr addrspace(1) %out
ret void
}
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@

; TODO: enable doubles
; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32:
define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) {
%val = load double, double addrspace(1)* %in, align 8
define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%val = load double, ptr addrspace(1) %in, align 8
%add = fadd double %val, 4.0
%bc = bitcast double %add to <2 x i32>
store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8
store <2 x i32> %bc, ptr addrspace(1) %out, align 8
ret void
}

; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64:
define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) {
define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
Expand All @@ -25,12 +25,12 @@ if:

end:
%phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if]
store <2 x double> %phi, <2 x double> addrspace(1)* %out
store <2 x double> %phi, ptr addrspace(1) %out
ret void
}

; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64:
define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) {
define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) {
entry:
%cmp0 = icmp eq i32 %cond, 0
br i1 %cmp0, label %if, label %end
Expand All @@ -41,6 +41,6 @@ if:

end:
%phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if]
store <2 x i64> %phi, <2 x i64> addrspace(1)* %out
store <2 x i64> %phi, ptr addrspace(1) %out
ret void
}