diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll index 34eb088b16f494..eb29e0ac8ec68c 100644 --- a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll @@ -4,7 +4,7 @@ ; BFI_INT Definition pattern from ISA docs ; (y & x) | (z & ~x) ; -define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +define amdgpu_kernel void @bfi_def(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { ; R600-LABEL: bfi_def: ; R600: ; %bb.0: ; %entry ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -20,13 +20,13 @@ entry: %1 = and i32 %z, %0 %2 = and i32 %y, %x %3 = or i32 %1, %2 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } ; SHA-256 Ch function ; z ^ (x & (y ^ z)) -define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +define amdgpu_kernel void @bfi_sha256_ch(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { ; R600-LABEL: bfi_sha256_ch: ; R600: ; %bb.0: ; %entry ; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -41,13 +41,13 @@ entry: %0 = xor i32 %y, %z %1 = and i32 %x, %0 %2 = xor i32 %z, %1 - store i32 %2, i32 addrspace(1)* %out + store i32 %2, ptr addrspace(1) %out ret void } ; SHA-256 Ma function ; ((x & z) | (y & (x | z))) -define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) { +define amdgpu_kernel void @bfi_sha256_ma(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z) { ; R600-LABEL: bfi_sha256_ma: ; R600: ; %bb.0: ; %entry ; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] @@ -64,7 +64,7 @@ entry: %1 = or i32 %x, %z %2 = and i32 %y, %1 %3 = or i32 %0, %2 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -149,7 +149,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) { %and1 = and i64 %not.a, %mask %bitselect = or i64 %and0, %and1 %scalar.use = add i64 %bitselect, 10 - store i64 %scalar.use, i64 addrspace(1)* undef + store i64 %scalar.use, ptr addrspace(1) undef ret void } @@ -176,7 +176,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) { %bitselect = xor i64 %and, %mask %scalar.use = add i64 %bitselect, 10 - store i64 %scalar.use, i64 addrspace(1)* undef + store i64 %scalar.use, ptr addrspace(1) undef ret void } @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) { %bitselect = xor i64 %and, %mask %scalar.use = add i64 %bitselect, 10 - store i64 %scalar.use, i64 addrspace(1)* undef + store i64 %scalar.use, ptr addrspace(1) undef ret void } @@ -232,6 +232,6 @@ entry: %or1 = or i64 %and0, %and1 %scalar.use = add i64 %or1, 10 - store i64 %scalar.use, i64 addrspace(1)* undef + store i64 %scalar.use, ptr addrspace(1) undef ret void } diff --git a/llvm/test/CodeGen/AMDGPU/big_alu.ll b/llvm/test/CodeGen/AMDGPU/big_alu.ll index 51387c8b79cbff..999c56645f7e1d 100644 --- a/llvm/test/CodeGen/AMDGPU/big_alu.ll +++ b/llvm/test/CodeGen/AMDGPU/big_alu.ll @@ -50,29 +50,29 @@ main_body: %tmp43 = extractelement <4 x float> %reg7, i32 1 %tmp44 = extractelement <4 x float> %reg7, i32 2 %tmp45 = extractelement <4 x float> %reg7, i32 3 - %tmp46 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %tmp46 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11) %tmp47 = extractelement <4 x float> %tmp46, i32 0 - %tmp48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %tmp48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11) %tmp49 = extractelement <4 x float> %tmp48, i32 1 - %tmp50 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) + %tmp50 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11) %tmp51 = extractelement <4 x float> %tmp50, i32 2 - %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) + %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12) %tmp53 = extractelement <4 x float> %tmp52, i32 0 - %tmp54 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %tmp54 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14) %tmp55 = extractelement <4 x float> %tmp54, i32 0 - %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14) %tmp57 = extractelement <4 x float> %tmp56, i32 1 - %tmp58 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %tmp58 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14) %tmp59 = extractelement <4 x float> %tmp58, i32 2 - %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) + %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14) %tmp61 = extractelement <4 x float> %tmp60, i32 3 - %tmp62 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %tmp62 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16) %tmp63 = extractelement <4 x float> %tmp62, i32 0 - %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16) %tmp65 = extractelement <4 x float> %tmp64, i32 1 - %tmp66 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %tmp66 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16) %tmp67 = extractelement <4 x float> %tmp66, i32 2 - %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp69 = extractelement <4 x float> %tmp68, i32 0 %tmp70 = fcmp oge float %tmp69, 3.500000e+00 %tmp71 = sext i1 %tmp70 to i32 @@ -80,7 +80,7 @@ main_body: %tmp73 = bitcast float %tmp72 to i32 %tmp74 = icmp ne i32 %tmp73, 0 %. = select i1 %tmp74, float 0.000000e+00, float 0.000000e+00 - %tmp75 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp75 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp76 = extractelement <4 x float> %tmp75, i32 0 %tmp77 = fcmp oge float %tmp76, 2.000000e+00 %tmp78 = sext i1 %tmp77 to i32 @@ -134,7 +134,7 @@ IF137: ; preds = %main_body %tmp123 = insertelement <4 x float> %tmp122, float 0.000000e+00, i32 3 %tmp124 = call float @llvm.r600.dot4(<4 x float> %tmp119, <4 x float> %tmp123) %tmp125 = fdiv float 1.000000e+00, %tmp124 - %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %tmp127 = extractelement <4 x float> %tmp126, i32 0 %tmp128 = fmul float %tmp127, %tmp125 %tmp129 = fmul float %tmp103, %tmp128 @@ -360,15 +360,15 @@ ENDIF136: ; preds = %ENDIF154, %main_bod %tmp333 = fmul float %tmp318, %tmp332 %tmp334 = fmul float %tmp320, %tmp332 %tmp335 = fmul float %tmp322, %tmp332 - %tmp336 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %tmp336 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) %tmp337 = extractelement <4 x float> %tmp336, i32 0 %tmp338 = fsub float -0.000000e+00, %tmp337 %tmp339 = fadd float 1.000000e+00, %tmp338 - %tmp340 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %tmp340 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %tmp341 = extractelement <4 x float> %tmp340, i32 0 %tmp342 = fsub float -0.000000e+00, %tmp341 %tmp343 = fadd float 1.000000e+00, %tmp342 - %tmp344 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %tmp344 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) %tmp345 = extractelement <4 x float> %tmp344, i32 0 %tmp346 = fsub float -0.000000e+00, %tmp345 %tmp347 = fadd float 1.000000e+00, %tmp346 @@ -1136,7 +1136,7 @@ ENDIF175: ; preds = %IF176, %ENDIF172 %temp92.11 = phi float [ %tmp856, %IF176 ], [ %temp92.10, %ENDIF172 ] %temp93.5 = phi float [ %tmp857, %IF176 ], [ %temp93.4, %ENDIF172 ] %temp94.5 = phi float [ %tmp858, %IF176 ], [ %temp94.4, %ENDIF172 ] - %tmp859 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %tmp859 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10) %tmp860 = extractelement <4 x float> %tmp859, i32 0 %tmp861 = fcmp olt float %tmp860, %tmp181 %tmp862 = sext i1 %tmp861 to i32 @@ -1257,12 +1257,12 @@ ENDIF178: ; preds = %IF179, %ENDIF175 %tmp931 = fmul float %temp87.6, %tmp927 %tmp932 = fmul float %tmp2, -2.000000e+00 %tmp933 = fadd float %tmp932, 1.000000e+00 - %tmp934 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 23) + %tmp934 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 23) %tmp935 = extractelement <4 x float> %tmp934, i32 2 %tmp936 = fsub float -0.000000e+00, %tmp935 %tmp937 = fadd float %tmp933, %tmp936 %tmp938 = fdiv float 1.000000e+00, %tmp937 - %tmp939 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 24) + %tmp939 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 24) %tmp940 = extractelement <4 x float> %tmp939, i32 2 %tmp941 = fmul float %tmp940, %tmp938 %tmp942 = fsub float -0.000000e+00, %tmp53 diff --git a/llvm/test/CodeGen/AMDGPU/elf.r600.ll b/llvm/test/CodeGen/AMDGPU/elf.r600.ll index 27631072463045..503d518d1ef82d 100644 --- a/llvm/test/CodeGen/AMDGPU/elf.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/elf.r600.ll @@ -9,9 +9,9 @@ ; CONFIG-NEXT: .long 2 ; CONFIG-NEXT: .long 165900 ; CONFIG-NEXT: .long 0 -define amdgpu_kernel void @test(float addrspace(1)* %out, i32 %p) { +define amdgpu_kernel void @test(ptr addrspace(1) %out, i32 %p) { %i = add i32 %p, 2 %r = bitcast i32 %i to float - store float %r, float addrspace(1)* %out + store float %r, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll index 5cb0c616d15f15..bb7257eb824a4f 100644 --- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r600.ll @@ -9,15 +9,15 @@ define amdgpu_ps void @fetch_limits_r600() { entry: - %tmp = load <4 x float>, <4 x float> addrspace(8)* null - %tmp1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) - %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) - %tmp3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) - %tmp4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) - %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) - %tmp6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) - %tmp7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) - %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %tmp = load <4 x float>, ptr addrspace(8) null + %tmp1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) + %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) + %tmp3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) + %tmp4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) + %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) + %tmp6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) + %tmp7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) + %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) %tmp9 = shufflevector <4 x float> %tmp, <4 x float> %tmp, <4 x i32> %tmp10 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1) %tmp11 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll index d8f7c0daa8de49..e0afdc8c6ba7f4 100644 --- a/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll +++ b/llvm/test/CodeGen/AMDGPU/fetch-limits.r700+.ll @@ -18,23 +18,23 @@ define amdgpu_ps void @fetch_limits_r700() { entry: - %0 = load <4 x float>, <4 x float> addrspace(8)* null - %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) - %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) - %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) - %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) - %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) - %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) - %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) - %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) - %9 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) - %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) - %11 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 11) - %12 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 12) - %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 13) - %14 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 14) - %15 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 15) - %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 16) + %0 = load <4 x float>, ptr addrspace(8) null + %1 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) + %2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) + %3 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) + %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) + %5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) + %6 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) + %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) + %8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) + %9 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) + %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10) + %11 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 11) + %12 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 12) + %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 13) + %14 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 14) + %15 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 15) + %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 16) %17 = shufflevector <4 x float> %0, <4 x float> %0, <4 x i32> %18 = call <4 x float> @llvm.r600.tex(<4 x float> %17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1) %19 = shufflevector <4 x float> %1, <4 x float> %1, <4 x i32> diff --git a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll index 71bb4afa64ef05..047e27be787e67 100644 --- a/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/fmaxnum.r600.ll @@ -3,9 +3,9 @@ ; EG-LABEL: {{^}}test_fmax_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, float %b) #0 { +define amdgpu_kernel void @test_fmax_f32(ptr addrspace(1) %out, float %a, float %b) #0 { %val = call float @llvm.maxnum.f32(float %a, float %b) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmax_f32(float addrspace(1)* %out, float %a, flo ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] ; EG: MAX_DX10 {{.*}}[[OUT]] ; EG: MAX_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { +define amdgpu_kernel void @test_fmax_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { %val = call <2 x float> @llvm.maxnum.v2f32(<2 x float> %a, <2 x float> %b) - store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 + store <2 x float> %val, ptr addrspace(1) %out, align 8 ret void } @@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmax_v2f32(<2 x float> addrspace(1)* %out, <2 x ; EG: MAX_DX10 {{.*}}[[OUT]] ; EG: MAX_DX10 {{.*}}[[OUT]] ; EG: MAX_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { +define amdgpu_kernel void @test_fmax_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 { %val = call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) - store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %val, ptr addrspace(1) %out, align 16 ret void } @@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmax_v4f32(<4 x float> addrspace(1)* %out, <4 x ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Y ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].Z ; EG-DAG: MAX_DX10 {{.*}}[[OUT2]].W -define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { +define amdgpu_kernel void @test_fmax_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 { %val = call <8 x float> @llvm.maxnum.v8f32(<8 x float> %a, <8 x float> %b) - store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %val, ptr addrspace(1) %out, align 32 ret void } @@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmax_v8f32(<8 x float> addrspace(1)* %out, <8 x ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Y ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].Z ; EG-DAG: MAX_DX10 {{.*}}[[OUT4]].W -define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { +define amdgpu_kernel void @test_fmax_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 { %val = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %a, <16 x float> %b) - store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 + store <16 x float> %val, ptr addrspace(1) %out, align 64 ret void } @@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmax_v16f32(<16 x float> addrspace(1)* %out, <16 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 1.0, float 2.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32(float addrspace(1)* %out) #0 { ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} ; EG: 2143289344(nan) -define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_nan(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 1.0, float 0x7FF8000000000000) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_val_nan(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 0x7FF8000000000000, float 1.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_nan_val(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 0.0, float 0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_p0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float 0.0, float -0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmax_f32_p0_n0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float -0.0, float 0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmax_f32_n0_p0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MAX_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmax_f32_n0_n0(ptr addrspace(1) %out) #0 { %val = call float @llvm.maxnum.f32(float -0.0, float -0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmax_var_immediate_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 * [[OUT]] -define amdgpu_kernel void @fmax_var_immediate_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmax_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.maxnum.f32(float %a, float 2.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmax_immediate_var_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmax_immediate_var_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmax_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.maxnum.f32(float 2.0, float %a) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmax_var_literal_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmax_var_literal_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmax_var_literal_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.maxnum.f32(float %a, float 99.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmax_literal_var_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MAX_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmax_literal_var_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmax_literal_var_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.maxnum.f32(float 99.0, float %a) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll index 713e95c7f46e15..8d85af091f538f 100644 --- a/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/fminnum.r600.ll @@ -3,9 +3,9 @@ ; EG-LABEL: {{^}}test_fmin_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, float %b) #0 { +define amdgpu_kernel void @test_fmin_f32(ptr addrspace(1) %out, float %a, float %b) #0 { %val = call float @llvm.minnum.f32(float %a, float %b) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -13,9 +13,9 @@ define amdgpu_kernel void @test_fmin_f32(float addrspace(1)* %out, float %a, flo ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+]] ; EG: MIN_DX10 {{.*}}[[OUT]] ; EG: MIN_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { +define amdgpu_kernel void @test_fmin_v2f32(ptr addrspace(1) %out, <2 x float> %a, <2 x float> %b) #0 { %val = call <2 x float> @llvm.minnum.v2f32(<2 x float> %a, <2 x float> %b) - store <2 x float> %val, <2 x float> addrspace(1)* %out, align 8 + store <2 x float> %val, ptr addrspace(1) %out, align 8 ret void } @@ -25,9 +25,9 @@ define amdgpu_kernel void @test_fmin_v2f32(<2 x float> addrspace(1)* %out, <2 x ; EG: MIN_DX10 {{.*}}[[OUT]] ; EG: MIN_DX10 {{.*}}[[OUT]] ; EG: MIN_DX10 {{.*}}[[OUT]] -define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) #0 { +define amdgpu_kernel void @test_fmin_v4f32(ptr addrspace(1) %out, <4 x float> %a, <4 x float> %b) #0 { %val = call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %b) - store <4 x float> %val, <4 x float> addrspace(1)* %out, align 16 + store <4 x float> %val, ptr addrspace(1) %out, align 16 ret void } @@ -42,9 +42,9 @@ define amdgpu_kernel void @test_fmin_v4f32(<4 x float> addrspace(1)* %out, <4 x ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Y ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].Z ; EG-DAG: MIN_DX10 {{.*}}[[OUT2]].W -define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, <8 x float> %b) #0 { +define amdgpu_kernel void @test_fmin_v8f32(ptr addrspace(1) %out, <8 x float> %a, <8 x float> %b) #0 { %val = call <8 x float> @llvm.minnum.v8f32(<8 x float> %a, <8 x float> %b) - store <8 x float> %val, <8 x float> addrspace(1)* %out, align 32 + store <8 x float> %val, ptr addrspace(1) %out, align 32 ret void } @@ -69,9 +69,9 @@ define amdgpu_kernel void @test_fmin_v8f32(<8 x float> addrspace(1)* %out, <8 x ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Y ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].Z ; EG-DAG: MIN_DX10 {{.*}}[[OUT4]].W -define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, <16 x float> %b) #0 { +define amdgpu_kernel void @test_fmin_v16f32(ptr addrspace(1) %out, <16 x float> %a, <16 x float> %b) #0 { %val = call <16 x float> @llvm.minnum.v16f32(<16 x float> %a, <16 x float> %b) - store <16 x float> %val, <16 x float> addrspace(1)* %out, align 64 + store <16 x float> %val, ptr addrspace(1) %out, align 64 ret void } @@ -79,9 +79,9 @@ define amdgpu_kernel void @test_fmin_v16f32(<16 x float> addrspace(1)* %out, <16 ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 1.0, float 2.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -90,9 +90,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32(float addrspace(1)* %out) #0 { ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} ; EG: 2143289344({{nan|1\.#QNAN0e\+00}}) -define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 0x7FF8000000000000) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -100,9 +100,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_nan(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 1.0, float 0x7FF8000000000000) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -110,9 +110,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_val_nan(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 0x7FF8000000000000, float 1.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -120,9 +120,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_nan_val(float addrspace(1)* %o ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 0.0, float 0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -130,9 +130,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_p0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float 0.0, float -0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -140,9 +140,9 @@ define amdgpu_kernel void @constant_fold_fmin_f32_p0_n0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float -0.0, float 0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } @@ -150,45 +150,45 @@ define amdgpu_kernel void @constant_fold_fmin_f32_n0_p0(float addrspace(1)* %out ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG-NOT: MIN_DX10 ; EG: MOV {{.*}}[[OUT]], literal.{{[xy]}} -define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(float addrspace(1)* %out) #0 { +define amdgpu_kernel void @constant_fold_fmin_f32_n0_n0(ptr addrspace(1) %out) #0 { %val = call float @llvm.minnum.f32(float -0.0, float -0.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmin_var_immediate_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmin_var_immediate_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmin_var_immediate_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.minnum.f32(float %a, float 2.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmin_immediate_var_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmin_immediate_var_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmin_immediate_var_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.minnum.f32(float 2.0, float %a) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmin_var_literal_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmin_var_literal_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmin_var_literal_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.minnum.f32(float %a, float 99.0) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}fmin_literal_var_f32: ; EG: MEM_RAT_CACHELESS STORE_RAW [[OUT:T[0-9]+\.[XYZW]]] ; EG: MIN_DX10 {{.*}}[[OUT]], {{KC0\[[0-9]\].[XYZW]}}, literal.{{[xy]}} -define amdgpu_kernel void @fmin_literal_var_f32(float addrspace(1)* %out, float %a) #0 { +define amdgpu_kernel void @fmin_literal_var_f32(ptr addrspace(1) %out, float %a) #0 { %val = call float @llvm.minnum.f32(float 99.0, float %a) - store float %val, float addrspace(1)* %out, align 4 + store float %val, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll index b5a0de95acf50d..9781e3fc828fdb 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.cube.ll @@ -7,15 +7,15 @@ ; CHECK: CUBE * T{{[0-9]}}.W define amdgpu_ps void @cube() { main_body: - %tmp = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp1 = extractelement <4 x float> %tmp, i32 3 - %tmp2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp2 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp3 = extractelement <4 x float> %tmp2, i32 0 %tmp4 = fdiv float %tmp3, %tmp1 - %tmp5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp5 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp6 = extractelement <4 x float> %tmp5, i32 1 %tmp7 = fdiv float %tmp6, %tmp1 - %tmp8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp8 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp9 = extractelement <4 x float> %tmp8, i32 2 %tmp10 = fdiv float %tmp9, %tmp1 %tmp11 = insertelement <4 x float> undef, float %tmp4, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll index de8a47741c9470..21c8ea6c281f4f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.dot4.ll @@ -2,10 +2,10 @@ declare float @llvm.r600.dot4(<4 x float>, <4 x float>) nounwind readnone -define amdgpu_kernel void @test_dp4(float addrspace(1)* %out, <4 x float> addrspace(1)* %a, <4 x float> addrspace(1)* %b) nounwind { - %src0 = load <4 x float>, <4 x float> addrspace(1)* %a, align 16 - %src1 = load <4 x float>, <4 x float> addrspace(1)* %b, align 16 +define amdgpu_kernel void @test_dp4(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) nounwind { + %src0 = load <4 x float>, ptr addrspace(1) %a, align 16 + %src1 = load <4 x float>, ptr addrspace(1) %b, align 16 %dp4 = call float @llvm.r600.dot4(<4 x float> %src0, <4 x float> %src1) nounwind readnone - store float %dp4, float addrspace(1)* %out, align 4 + store float %dp4, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll index 93caafbb952402..7859bac7bc5250 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.group.barrier.ll @@ -2,18 +2,18 @@ ; EG-LABEL: {{^}}test_group_barrier: ; EG: GROUP_BARRIER -define amdgpu_kernel void @test_group_barrier(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @test_group_barrier(ptr addrspace(1) %out) #0 { entry: %tmp = call i32 @llvm.r600.read.tidig.x() - %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp - store i32 %tmp, i32 addrspace(1)* %tmp1 + %tmp1 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp + store i32 %tmp, ptr addrspace(1) %tmp1 call void @llvm.r600.group.barrier() %tmp2 = call i32 @llvm.r600.read.local.size.x() %tmp3 = sub i32 %tmp2, 1 %tmp4 = sub i32 %tmp3, %tmp - %tmp5 = getelementptr i32, i32 addrspace(1)* %out, i32 %tmp4 - %tmp6 = load i32, i32 addrspace(1)* %tmp5 - store i32 %tmp6, i32 addrspace(1)* %tmp1 + %tmp5 = getelementptr i32, ptr addrspace(1) %out, i32 %tmp4 + %tmp6 = load i32, ptr addrspace(1) %tmp5 + store i32 %tmp6, ptr addrspace(1) %tmp1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll index 7f18fec3db6d54..64f2fd26f1df0d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -14,10 +14,10 @@ ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_x(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_x(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -29,10 +29,10 @@ entry: ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_y(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_y(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -44,10 +44,10 @@ entry: ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_z(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_z(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.local.size.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -57,12 +57,12 @@ entry: ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[X]], s[[Y]] ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_xy(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_xy(ptr addrspace(1) %out) { entry: %x = call i32 @llvm.r600.read.local.size.x() #0 %y = call i32 @llvm.r600.read.local.size.y() #0 %val = mul i32 %x, %y - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } @@ -76,12 +76,12 @@ entry: ; GCN: s_mul_i32 [[VAL:s[0-9]+]], [[X]], [[Z]] ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_xz(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_xz(ptr addrspace(1) %out) { entry: %x = call i32 @llvm.r600.read.local.size.x() #0 %z = call i32 @llvm.r600.read.local.size.z() #0 %val = mul i32 %x, %z - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } @@ -94,12 +94,12 @@ entry: ; GCN: s_mul_i32 [[VAL:s[0-9]+]], s[[#LOAD + 0]], s[[#LOAD + 1]] ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_yz(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_yz(ptr addrspace(1) %out) { entry: %y = call i32 @llvm.r600.read.local.size.y() #0 %z = call i32 @llvm.r600.read.local.size.z() #0 %val = mul i32 %y, %z - store i32 %val, i32 addrspace(1)* %out + store i32 %val, ptr addrspace(1) %out ret void } @@ -115,14 +115,14 @@ entry: ; GCN: s_add_i32 [[VAL:s[0-9]+]], [[M]], s[[Z]] ; GCN-DAG: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_xyz(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_xyz(ptr addrspace(1) %out) { entry: %x = call i32 @llvm.r600.read.local.size.x() #0 %y = call i32 @llvm.r600.read.local.size.y() #0 %z = call i32 @llvm.r600.read.local.size.z() #0 %xy = mul i32 %x, %y %xyz = add i32 %xy, %z - store i32 %xyz, i32 addrspace(1)* %out + store i32 %xyz, ptr addrspace(1) %out ret void } @@ -132,12 +132,12 @@ entry: ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NEXT: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_x_known_bits(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_x_known_bits(ptr addrspace(1) %out) { entry: %size = call i32 @llvm.r600.read.local.size.x() #0 %shl = shl i32 %size, 16 %shr = lshr i32 %shl, 16 - store i32 %shr, i32 addrspace(1)* %out + store i32 %shr, ptr addrspace(1) %out ret void } @@ -147,12 +147,12 @@ entry: ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NEXT: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_y_known_bits(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_y_known_bits(ptr addrspace(1) %out) { entry: %size = call i32 @llvm.r600.read.local.size.y() #0 %shl = shl i32 %size, 16 %shr = lshr i32 %shl, 16 - store i32 %shr, i32 addrspace(1)* %out + store i32 %shr, ptr addrspace(1) %out ret void } @@ -162,12 +162,12 @@ entry: ; GCN-NOT: 0xffff ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] ; GCN-NEXT: buffer_store_dword [[VVAL]] -define amdgpu_kernel void @local_size_z_known_bits(i32 addrspace(1)* %out) { +define amdgpu_kernel void @local_size_z_known_bits(ptr addrspace(1) %out) { entry: %size = call i32 @llvm.r600.read.local.size.z() #0 %shl = shl i32 %size, 16 %shr = lshr i32 %shl, 16 - store i32 %shr, i32 addrspace(1)* %out + store i32 %shr, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll index 90d076d4fb4d87..f2c25c79c7808e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.clamped.ll @@ -4,8 +4,8 @@ declare float @llvm.r600.recipsqrt.clamped.f32(float) nounwind readnone ; EG-LABEL: {{^}}rsq_clamped_f32: ; EG: RECIPSQRT_CLAMPED -define amdgpu_kernel void @rsq_clamped_f32(float addrspace(1)* %out, float %src) nounwind { +define amdgpu_kernel void @rsq_clamped_f32(ptr addrspace(1) %out, float %src) nounwind { %rsq_clamped = call float @llvm.r600.recipsqrt.clamped.f32(float %src) - store float %rsq_clamped, float addrspace(1)* %out, align 4 + store float %rsq_clamped, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll index d9177b39b8ac0b..004d348dfbd24a 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.recipsqrt.ieee.ll @@ -4,25 +4,25 @@ declare float @llvm.r600.recipsqrt.ieee.f32(float) nounwind readnone ; EG-LABEL: {{^}}recipsqrt.ieee_f32: ; EG: RECIPSQRT_IEEE -define amdgpu_kernel void @recipsqrt.ieee_f32(float addrspace(1)* %out, float %src) nounwind { +define amdgpu_kernel void @recipsqrt.ieee_f32(ptr addrspace(1) %out, float %src) nounwind { %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float %src) nounwind readnone - store float %recipsqrt.ieee, float addrspace(1)* %out, align 4 + store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4 ret void } ; TODO: Really these should be constant folded ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_4.0 ; EG: RECIPSQRT_IEEE -define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(float addrspace(1)* %out) nounwind { +define amdgpu_kernel void @recipsqrt.ieee_f32_constant_4.0(ptr addrspace(1) %out) nounwind { %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 4.0) nounwind readnone - store float %recipsqrt.ieee, float addrspace(1)* %out, align 4 + store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4 ret void } ; EG-LABEL: {{^}}recipsqrt.ieee_f32_constant_100.0 ; EG: RECIPSQRT_IEEE -define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(float addrspace(1)* %out) nounwind { +define amdgpu_kernel void @recipsqrt.ieee_f32_constant_100.0(ptr addrspace(1) %out) nounwind { %recipsqrt.ieee = call float @llvm.r600.recipsqrt.ieee.f32(float 100.0) nounwind readnone - store float %recipsqrt.ieee, float addrspace(1)* %out, align 4 + store float %recipsqrt.ieee, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll index 98044917e2b094..7ec4072f26224f 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.r600.tex.ll @@ -17,9 +17,9 @@ ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNNN ;CHECK: TEX_SAMPLE T{{[0-9]+\.XYZW, T[0-9]+\.XYZW}} RID:0 SID:0 CT:NNUN -define amdgpu_kernel void @test(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { +define amdgpu_kernel void @test(ptr addrspace(1) %out, ptr addrspace(1) %in) { bb: - %addr = load <4 x float>, <4 x float> addrspace(1)* %in + %addr = load <4 x float>, ptr addrspace(1) %in %tmp = shufflevector <4 x float> %addr, <4 x float> %addr, <4 x i32> %tmp1 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1) %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp1, <4 x i32> @@ -52,7 +52,7 @@ bb: %tmp29 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1) %tmp30 = shufflevector <4 x float> %tmp29, <4 x float> %tmp29, <4 x i32> %tmp31 = call <4 x float> @llvm.r600.tex(<4 x float> %tmp30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0, i32 1) - store <4 x float> %tmp31, <4 x float> addrspace(1)* %out + store <4 x float> %tmp31, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll index c8f4e4c986a76c..b0c8cc2810ff04 100644 --- a/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/local-memory.r600.ll @@ -15,19 +15,19 @@ ; EG-NEXT: ALU clause ; EG: LDS_READ_RET -define amdgpu_kernel void @local_memory(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @local_memory(ptr addrspace(1) %out) #0 { entry: %y.i = call i32 @llvm.r600.read.tidig.x() #1 - %arrayidx = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %y.i - store i32 %y.i, i32 addrspace(3)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %y.i + store i32 %y.i, ptr addrspace(3) %arrayidx, align 4 %add = add nsw i32 %y.i, 1 %cmp = icmp eq i32 %add, 16 %.add = select i1 %cmp, i32 0, i32 %add call void @llvm.r600.group.barrier() - %arrayidx1 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(3)* @local_memory.local_mem, i32 0, i32 %.add - %tmp = load i32, i32 addrspace(3)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %y.i - store i32 %tmp, i32 addrspace(1)* %arrayidx2, align 4 + %arrayidx1 = getelementptr inbounds [128 x i32], ptr addrspace(3) @local_memory.local_mem, i32 0, i32 %.add + %tmp = load i32, ptr addrspace(3) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %y.i + store i32 %tmp, ptr addrspace(1) %arrayidx2, align 4 ret void } @@ -57,25 +57,25 @@ entry: ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -define amdgpu_kernel void @local_memory_two_objects(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @local_memory_two_objects(ptr addrspace(1) %out) #0 { entry: %x.i = call i32 @llvm.r600.read.tidig.x() #1 - %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %x.i - store i32 %x.i, i32 addrspace(3)* %arrayidx, align 4 + %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %x.i + store i32 %x.i, ptr addrspace(3) %arrayidx, align 4 %mul = shl nsw i32 %x.i, 1 - %arrayidx1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %x.i - store i32 %mul, i32 addrspace(3)* %arrayidx1, align 4 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %x.i + store i32 %mul, ptr addrspace(3) %arrayidx1, align 4 %sub = sub nsw i32 3, %x.i call void @llvm.r600.group.barrier() - %arrayidx2 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem0, i32 0, i32 %sub - %tmp = load i32, i32 addrspace(3)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %x.i - store i32 %tmp, i32 addrspace(1)* %arrayidx3, align 4 - %arrayidx4 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @local_memory_two_objects.local_mem1, i32 0, i32 %sub - %tmp1 = load i32, i32 addrspace(3)* %arrayidx4, align 4 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem0, i32 0, i32 %sub + %tmp = load i32, ptr addrspace(3) %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %x.i + store i32 %tmp, ptr addrspace(1) %arrayidx3, align 4 + %arrayidx4 = getelementptr inbounds [4 x i32], ptr addrspace(3) @local_memory_two_objects.local_mem1, i32 0, i32 %sub + %tmp1 = load i32, ptr addrspace(3) %arrayidx4, align 4 %add = add nsw i32 %x.i, 4 - %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %add - store i32 %tmp1, i32 addrspace(1)* %arrayidx5, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 %add + store i32 %tmp1, ptr addrspace(1) %arrayidx5, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll index 5bb21cb7595294..98a7bee2c6d67f 100644 --- a/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/mul_uint24-r600.ll @@ -2,7 +2,7 @@ ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefixes=CM %s ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG %s -define amdgpu_kernel void @test_umul24_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @test_umul24_i32(ptr addrspace(1) %out, i32 %a, i32 %b) { ; CM-LABEL: test_umul24_i32: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] @@ -38,12 +38,12 @@ entry: %1 = shl i32 %b, 8 %b_24 = lshr i32 %1, 8 %2 = mul i32 %a_24, %b_24 - store i32 %2, i32 addrspace(1)* %out + store i32 %2, ptr addrspace(1) %out ret void } ; The result must be sign-extended. -define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, i16 %b) { +define amdgpu_kernel void @test_umul24_i16_sext(ptr addrspace(1) %out, i16 %a, i16 %b) { ; CM-LABEL: test_umul24_i16_sext: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 0, @10, KC0[], KC1[] @@ -88,12 +88,12 @@ define amdgpu_kernel void @test_umul24_i16_sext(i32 addrspace(1)* %out, i16 %a, entry: %mul = mul i16 %a, %b %ext = sext i16 %mul to i32 - store i32 %ext, i32 addrspace(1)* %out + store i32 %ext, ptr addrspace(1) %out ret void } ; The result must be sign-extended. -define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) { +define amdgpu_kernel void @test_umul24_i8(ptr addrspace(1) %out, i8 %a, i8 %b) { ; CM-LABEL: test_umul24_i8: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 0, @10, KC0[], KC1[] @@ -138,11 +138,11 @@ define amdgpu_kernel void @test_umul24_i8(i32 addrspace(1)* %out, i8 %a, i8 %b) entry: %mul = mul i8 %a, %b %ext = sext i8 %mul to i32 - store i32 %ext, i32 addrspace(1)* %out + store i32 %ext, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_umulhi24_i32_i64(i32 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @test_umulhi24_i32_i64(ptr addrspace(1) %out, i32 %a, i32 %b) { ; CM-LABEL: test_umulhi24_i32_i64: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] @@ -175,11 +175,11 @@ entry: %mul48 = mul i64 %a.24.i64, %b.24.i64 %mul48.hi = lshr i64 %mul48, 32 %mul24hi = trunc i64 %mul48.hi to i32 - store i32 %mul24hi, i32 addrspace(1)* %out + store i32 %mul24hi, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @test_umulhi24(i32 addrspace(1)* %out, i64 %a, i64 %b) { +define amdgpu_kernel void @test_umulhi24(ptr addrspace(1) %out, i64 %a, i64 %b) { ; CM-LABEL: test_umulhi24: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] @@ -210,12 +210,12 @@ entry: %mul48 = mul i64 %a.24, %b.24 %mul48.hi = lshr i64 %mul48, 32 %mul24.hi = trunc i64 %mul48.hi to i32 - store i32 %mul24.hi, i32 addrspace(1)* %out + store i32 %mul24.hi, ptr addrspace(1) %out ret void } ; Multiply with 24-bit inputs and 64-bit output. -define amdgpu_kernel void @test_umul24_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +define amdgpu_kernel void @test_umul24_i64(ptr addrspace(1) %out, i64 %a, i64 %b) { ; CM-LABEL: test_umul24_i64: ; CM: ; %bb.0: ; %entry ; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] @@ -257,6 +257,6 @@ entry: %tmp1 = shl i64 %b, 40 %b_24 = lshr i64 %tmp1, 40 %tmp2 = mul i64 %a_24, %b_24 - store i64 %tmp2, i64 addrspace(1)* %out + store i64 %tmp2, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll index 4685e54d07f9e0..5af9f78b471c5e 100644 --- a/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll +++ b/llvm/test/CodeGen/AMDGPU/parallelandifcollapse.ll @@ -22,35 +22,35 @@ entry: %c1 = alloca i32, align 4, addrspace(5) %d1 = alloca i32, align 4, addrspace(5) %data = alloca i32, align 4, addrspace(5) - %0 = load i32, i32 addrspace(5)* %a0, align 4 - %1 = load i32, i32 addrspace(5)* %b0, align 4 + %0 = load i32, ptr addrspace(5) %a0, align 4 + %1 = load i32, ptr addrspace(5) %b0, align 4 %cmp = icmp ne i32 %0, %1 br i1 %cmp, label %land.lhs.true, label %if.end land.lhs.true: ; preds = %entry - %2 = load i32, i32 addrspace(5)* %c0, align 4 - %3 = load i32, i32 addrspace(5)* %d0, align 4 + %2 = load i32, ptr addrspace(5) %c0, align 4 + %3 = load i32, ptr addrspace(5) %d0, align 4 %cmp1 = icmp ne i32 %2, %3 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %land.lhs.true - store i32 1, i32 addrspace(5)* %data, align 4 + store i32 1, ptr addrspace(5) %data, align 4 br label %if.end if.end: ; preds = %if.then, %land.lhs.true, %entry - %4 = load i32, i32 addrspace(5)* %a1, align 4 - %5 = load i32, i32 addrspace(5)* %b1, align 4 + %4 = load i32, ptr addrspace(5) %a1, align 4 + %5 = load i32, ptr addrspace(5) %b1, align 4 %cmp2 = icmp ne i32 %4, %5 br i1 %cmp2, label %land.lhs.true3, label %if.end6 land.lhs.true3: ; preds = %if.end - %6 = load i32, i32 addrspace(5)* %c1, align 4 - %7 = load i32, i32 addrspace(5)* %d1, align 4 + %6 = load i32, ptr addrspace(5) %c1, align 4 + %7 = load i32, ptr addrspace(5) %d1, align 4 %cmp4 = icmp ne i32 %6, %7 br i1 %cmp4, label %if.then5, label %if.end6 if.then5: ; preds = %land.lhs.true3 - store i32 1, i32 addrspace(5)* %data, align 4 + store i32 1, ptr addrspace(5) %data, align 4 br label %if.end6 if.end6: ; preds = %if.then5, %land.lhs.true3, %if.end diff --git a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll index 1d55c9ee074a0e..fc71e161ce691c 100644 --- a/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll +++ b/llvm/test/CodeGen/AMDGPU/parallelorifcollapse.ll @@ -23,14 +23,14 @@ entry: %c1 = alloca i32, align 4, addrspace(5) %d1 = alloca i32, align 4, addrspace(5) %data = alloca i32, align 4, addrspace(5) - %0 = load i32, i32 addrspace(5)* %a0, align 4 - %1 = load i32, i32 addrspace(5)* %b0, align 4 + %0 = load i32, ptr addrspace(5) %a0, align 4 + %1 = load i32, ptr addrspace(5) %b0, align 4 %cmp = icmp ne i32 %0, %1 br i1 %cmp, label %land.lhs.true, label %if.else land.lhs.true: ; preds = %entry - %2 = load i32, i32 addrspace(5)* %c0, align 4 - %3 = load i32, i32 addrspace(5)* %d0, align 4 + %2 = load i32, ptr addrspace(5) %c0, align 4 + %3 = load i32, ptr addrspace(5) %d0, align 4 %cmp1 = icmp ne i32 %2, %3 br i1 %cmp1, label %if.then, label %if.else @@ -38,18 +38,18 @@ if.then: ; preds = %land.lhs.true br label %if.end if.else: ; preds = %land.lhs.true, %entry - store i32 1, i32 addrspace(5)* %data, align 4 + store i32 1, ptr addrspace(5) %data, align 4 br label %if.end if.end: ; preds = %if.else, %if.then - %4 = load i32, i32 addrspace(5)* %a1, align 4 - %5 = load i32, i32 addrspace(5)* %b1, align 4 + %4 = load i32, ptr addrspace(5) %a1, align 4 + %5 = load i32, ptr addrspace(5) %b1, align 4 %cmp2 = icmp ne i32 %4, %5 br i1 %cmp2, label %land.lhs.true3, label %if.else6 land.lhs.true3: ; preds = %if.end - %6 = load i32, i32 addrspace(5)* %c1, align 4 - %7 = load i32, i32 addrspace(5)* %d1, align 4 + %6 = load i32, ptr addrspace(5) %c1, align 4 + %7 = load i32, ptr addrspace(5) %d1, align 4 %cmp4 = icmp ne i32 %6, %7 br i1 %cmp4, label %if.then5, label %if.else6 @@ -57,7 +57,7 @@ if.then5: ; preds = %land.lhs.true3 br label %if.end7 if.else6: ; preds = %land.lhs.true3, %if.end - store i32 1, i32 addrspace(5)* %data, align 4 + store i32 1, ptr addrspace(5) %data, align 4 br label %if.end7 if.end7: ; preds = %if.else6, %if.then5 diff --git a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll index 6c8f308bd65eaa..81e8dfcdb3a4b9 100644 --- a/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/private-memory-r600.ll @@ -18,23 +18,22 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; OPT: call i32 @llvm.r600.read.tidig.y(), !range !1 ; OPT: call i32 @llvm.r600.read.tidig.z(), !range !1 -define amdgpu_kernel void @mova_same_clause(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) #0 { +define amdgpu_kernel void @mova_same_clause(ptr addrspace(1) nocapture %out, ptr addrspace(1) nocapture %in) #0 { entry: %stack = alloca [5 x i32], align 4, addrspace(5) - %0 = load i32, i32 addrspace(1)* %in, align 4 - %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %0 - store i32 4, i32 addrspace(5)* %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4 - %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 %1 - store i32 5, i32 addrspace(5)* %arrayidx3, align 4 - %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 0 - %2 = load i32, i32 addrspace(5)* %arrayidx10, align 4 - store i32 %2, i32 addrspace(1)* %out, align 4 - %arrayidx12 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(5)* %stack, i32 0, i32 1 - %3 = load i32, i32 addrspace(5)* %arrayidx12 - %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1 - store i32 %3, i32 addrspace(1)* %arrayidx13 + %0 = load i32, ptr addrspace(1) %in, align 4 + %arrayidx1 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %0 + store i32 4, ptr addrspace(5) %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %1 = load i32, ptr addrspace(1) %arrayidx2, align 4 + %arrayidx3 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %1 + store i32 5, ptr addrspace(5) %arrayidx3, align 4 + %2 = load i32, ptr addrspace(5) %stack, align 4 + store i32 %2, ptr addrspace(1) %out, align 4 + %arrayidx12 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 1 + %3 = load i32, ptr addrspace(5) %arrayidx12 + %arrayidx13 = getelementptr inbounds i32, ptr addrspace(1) %out, i32 1 + store i32 %3, ptr addrspace(1) %arrayidx13 ret void } @@ -49,24 +48,20 @@ entry: ; R600-NOT: MOVA_INT %struct.point = type { i32, i32 } -define amdgpu_kernel void @multiple_structs(i32 addrspace(1)* %out) #0 { +define amdgpu_kernel void @multiple_structs(ptr addrspace(1) %out) #0 { entry: %a = alloca %struct.point, addrspace(5) %b = alloca %struct.point, addrspace(5) - %a.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0 - %a.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 1 - %b.x.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0 - %b.y.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %a.x.ptr - store i32 1, i32 addrspace(5)* %a.y.ptr - store i32 2, i32 addrspace(5)* %b.x.ptr - store i32 3, i32 addrspace(5)* %b.y.ptr - %a.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %a, i32 0, i32 0 - %b.indirect.ptr = getelementptr inbounds %struct.point, %struct.point addrspace(5)* %b, i32 0, i32 0 - %a.indirect = load i32, i32 addrspace(5)* %a.indirect.ptr - %b.indirect = load i32, i32 addrspace(5)* %b.indirect.ptr + %a.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %a, i32 0, i32 1 + %b.y.ptr = getelementptr inbounds %struct.point, ptr addrspace(5) %b, i32 0, i32 1 + store i32 0, ptr addrspace(5) %a + store i32 1, ptr addrspace(5) %a.y.ptr + store i32 2, ptr addrspace(5) %b + store i32 3, ptr addrspace(5) %b.y.ptr + %a.indirect = load i32, ptr addrspace(5) %a + %b.indirect = load i32, ptr addrspace(5) %b %0 = add i32 %a.indirect, %b.indirect - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -77,69 +72,63 @@ entry: ; FUNC-LABEL: {{^}}direct_loop: ; R600-NOT: MOVA_INT -define amdgpu_kernel void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @direct_loop(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { entry: %prv_array_const = alloca [2 x i32], addrspace(5) %prv_array = alloca [2 x i32], addrspace(5) - %a = load i32, i32 addrspace(1)* %in - %b_src_ptr = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1 - %b = load i32, i32 addrspace(1)* %b_src_ptr - %a_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0 - store i32 %a, i32 addrspace(5)* %a_dst_ptr - %b_dst_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 1 - store i32 %b, i32 addrspace(5)* %b_dst_ptr + %a = load i32, ptr addrspace(1) %in + %b_src_ptr = getelementptr inbounds i32, ptr addrspace(1) %in, i32 1 + %b = load i32, ptr addrspace(1) %b_src_ptr + store i32 %a, ptr addrspace(5) %prv_array_const + %b_dst_ptr = getelementptr inbounds [2 x i32], ptr addrspace(5) %prv_array_const, i32 0, i32 1 + store i32 %b, ptr addrspace(5) %b_dst_ptr br label %for.body for.body: %inc = phi i32 [0, %entry], [%count, %for.body] - %x_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array_const, i32 0, i32 0 - %x = load i32, i32 addrspace(5)* %x_ptr - %y_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0 - %y = load i32, i32 addrspace(5)* %y_ptr + %x = load i32, ptr addrspace(5) %prv_array_const + %y = load i32, ptr addrspace(5) %prv_array %xy = add i32 %x, %y - store i32 %xy, i32 addrspace(5)* %y_ptr + store i32 %xy, ptr addrspace(5) %prv_array %count = add i32 %inc, 1 %done = icmp eq i32 %count, 4095 br i1 %done, label %for.end, label %for.body for.end: - %value_ptr = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %prv_array, i32 0, i32 0 - %value = load i32, i32 addrspace(5)* %value_ptr - store i32 %value, i32 addrspace(1)* %out + %value = load i32, ptr addrspace(5) %prv_array + store i32 %value, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}short_array: ; R600-VECT: MOVA_INT -define amdgpu_kernel void @short_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @short_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %0 = alloca [2 x i16], addrspace(5) - %1 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 1 - store i16 0, i16 addrspace(5)* %1 - store i16 1, i16 addrspace(5)* %2 - %3 = getelementptr inbounds [2 x i16], [2 x i16] addrspace(5)* %0, i32 0, i32 %index - %4 = load i16, i16 addrspace(5)* %3 - %5 = sext i16 %4 to i32 - store i32 %5, i32 addrspace(1)* %out + %1 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 1 + store i16 0, ptr addrspace(5) %0 + store i16 1, ptr addrspace(5) %1 + %2 = getelementptr inbounds [2 x i16], ptr addrspace(5) %0, i32 0, i32 %index + %3 = load i16, ptr addrspace(5) %2 + %4 = sext i16 %3 to i32 + store i32 %4, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}char_array: ; R600-VECT: MOVA_INT -define amdgpu_kernel void @char_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @char_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %0 = alloca [2 x i8], addrspace(5) - %1 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %1 - store i8 1, i8 addrspace(5)* %2 - %3 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %0, i32 0, i32 %index - %4 = load i8, i8 addrspace(5)* %3 - %5 = sext i8 %4 to i32 - store i32 %5, i32 addrspace(1)* %out + %1 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 1 + store i8 0, ptr addrspace(5) %0 + store i8 1, ptr addrspace(5) %1 + %2 = getelementptr inbounds [2 x i8], ptr addrspace(5) %0, i32 0, i32 %index + %3 = load i8, ptr addrspace(5) %2 + %4 = sext i8 %3 to i32 + store i32 %4, ptr addrspace(1) %out ret void } @@ -150,127 +139,120 @@ entry: ; R600-NOT: MOV T0.X ; Additional check in case the move ends up in the last slot ; R600-NOT: MOV * TO.X -define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) #0 { entry: %0 = alloca [2 x i32], addrspace(5) - %1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %1 - store i32 1, i32 addrspace(5)* %2 - %3 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in - %4 = load i32, i32 addrspace(5)* %3 - %5 = call i32 @llvm.r600.read.tidig.x() - %6 = add i32 %4, %5 - store i32 %6, i32 addrspace(1)* %out + %1 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 1 + store i32 0, ptr addrspace(5) %0 + store i32 1, ptr addrspace(5) %1 + %2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in + %3 = load i32, ptr addrspace(5) %2 + %4 = call i32 @llvm.r600.read.tidig.x() + %5 = add i32 %3, %4 + store i32 %5, ptr addrspace(1) %out ret void } ; Test that two stack objects are not stored in the same register ; The second stack object should be in T3.X ; FUNC-LABEL: {{^}}no_overlap: -define amdgpu_kernel void @no_overlap(i32 addrspace(1)* %out, i32 %in) #0 { +define amdgpu_kernel void @no_overlap(ptr addrspace(1) %out, i32 %in) #0 { entry: %0 = alloca [3 x i8], align 1, addrspace(5) %1 = alloca [2 x i8], align 1, addrspace(5) - %2 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 0 - %3 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 1 - %4 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 2 - %5 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 0 - %6 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %2 - store i8 1, i8 addrspace(5)* %3 - store i8 2, i8 addrspace(5)* %4 - store i8 1, i8 addrspace(5)* %5 - store i8 0, i8 addrspace(5)* %6 - %7 = getelementptr inbounds [3 x i8], [3 x i8] addrspace(5)* %0, i32 0, i32 %in - %8 = getelementptr inbounds [2 x i8], [2 x i8] addrspace(5)* %1, i32 0, i32 %in - %9 = load i8, i8 addrspace(5)* %7 - %10 = load i8, i8 addrspace(5)* %8 - %11 = add i8 %9, %10 - %12 = sext i8 %11 to i32 - store i32 %12, i32 addrspace(1)* %out + %2 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 1 + %3 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 2 + %4 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 1 + store i8 0, ptr addrspace(5) %0 + store i8 1, ptr addrspace(5) %2 + store i8 2, ptr addrspace(5) %3 + store i8 1, ptr addrspace(5) %1 + store i8 0, ptr addrspace(5) %4 + %5 = getelementptr inbounds [3 x i8], ptr addrspace(5) %0, i32 0, i32 %in + %6 = getelementptr inbounds [2 x i8], ptr addrspace(5) %1, i32 0, i32 %in + %7 = load i8, ptr addrspace(5) %5 + %8 = load i8, ptr addrspace(5) %6 + %9 = add i8 %7, %8 + %10 = sext i8 %9 to i32 + store i32 %10, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @char_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @char_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i8]], addrspace(5) - %gep0 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i8 0, i8 addrspace(5)* %gep0 - store i8 1, i8 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x [2 x i8]], [2 x [2 x i8]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i8, i8 addrspace(5)* %gep2 + %gep1 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i8 0, ptr addrspace(5) %alloca + store i8 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x [2 x i8]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i8, ptr addrspace(5) %gep2 %sext = sext i8 %load to i32 - store i32 %sext, i32 addrspace(1)* %out + store i32 %sext, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @i32_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @i32_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i32]], addrspace(5) - %gep0 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i32 0, ptr addrspace(5) %alloca + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x [2 x i32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @i64_array_array(i64 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @i64_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x i64]], addrspace(5) - %gep0 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 0 - %gep1 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - store i64 0, i64 addrspace(5)* %gep0 - store i64 1, i64 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x [2 x i64]], [2 x [2 x i64]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index - %load = load i64, i64 addrspace(5)* %gep2 - store i64 %load, i64 addrspace(1)* %out + %gep1 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + store i64 0, ptr addrspace(5) %alloca + store i64 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x [2 x i64]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index + %load = load i64, ptr addrspace(5) %gep2 + store i64 %load, ptr addrspace(1) %out ret void } %struct.pair32 = type { i32, i32 } -define amdgpu_kernel void @struct_array_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @struct_array_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x [2 x %struct.pair32]], addrspace(5) - %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 0, i32 1 - %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 1, i32 1 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], [2 x [2 x %struct.pair32]] addrspace(5)* %alloca, i32 0, i32 0, i32 %index, i32 0 - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep0 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 0, i32 1 + %gep1 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1, i32 1 + store i32 0, ptr addrspace(5) %gep0 + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x [2 x %struct.pair32]], ptr addrspace(5) %alloca, i32 0, i32 0, i32 %index, i32 0 + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @struct_pair32_array(i32 addrspace(1)* %out, i32 %index) #0 { +define amdgpu_kernel void @struct_pair32_array(ptr addrspace(1) %out, i32 %index) #0 { entry: %alloca = alloca [2 x %struct.pair32], addrspace(5) - %gep0 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 0, i32 1 - %gep1 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 1, i32 0 - store i32 0, i32 addrspace(5)* %gep0 - store i32 1, i32 addrspace(5)* %gep1 - %gep2 = getelementptr inbounds [2 x %struct.pair32], [2 x %struct.pair32] addrspace(5)* %alloca, i32 0, i32 %index, i32 0 - %load = load i32, i32 addrspace(5)* %gep2 - store i32 %load, i32 addrspace(1)* %out + %gep0 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 0, i32 1 + %gep1 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 1, i32 0 + store i32 0, ptr addrspace(5) %gep0 + store i32 1, ptr addrspace(5) %gep1 + %gep2 = getelementptr inbounds [2 x %struct.pair32], ptr addrspace(5) %alloca, i32 0, i32 %index, i32 0 + %load = load i32, ptr addrspace(5) %gep2 + store i32 %load, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @select_private(i32 addrspace(1)* %out, i32 %in) nounwind { +define amdgpu_kernel void @select_private(ptr addrspace(1) %out, i32 %in) nounwind { entry: %tmp = alloca [2 x i32], addrspace(5) - %tmp1 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 0 - %tmp2 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(5)* %tmp, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %tmp1 - store i32 1, i32 addrspace(5)* %tmp2 + %tmp2 = getelementptr inbounds [2 x i32], ptr addrspace(5) %tmp, i32 0, i32 1 + store i32 0, ptr addrspace(5) %tmp + store i32 1, ptr addrspace(5) %tmp2 %cmp = icmp eq i32 %in, 0 - %sel = select i1 %cmp, i32 addrspace(5)* %tmp1, i32 addrspace(5)* %tmp2 - %load = load i32, i32 addrspace(5)* %sel - store i32 %load, i32 addrspace(1)* %out + %sel = select i1 %cmp, ptr addrspace(5) %tmp, ptr addrspace(5) %tmp2 + %load = load i32, ptr addrspace(5) %sel + store i32 %load, ptr addrspace(1) %out ret void } @@ -281,16 +263,16 @@ entry: ; SI-NOT: ds_write ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; SI: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; -define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { +define amdgpu_kernel void @ptrtoint(ptr addrspace(1) %out, i32 %a, i32 %b) #0 { %alloca = alloca [16 x i32], addrspace(5) - %tmp0 = getelementptr inbounds [16 x i32], [16 x i32] addrspace(5)* %alloca, i32 0, i32 %a - store i32 5, i32 addrspace(5)* %tmp0 - %tmp1 = ptrtoint [16 x i32] addrspace(5)* %alloca to i32 + %tmp0 = getelementptr inbounds [16 x i32], ptr addrspace(5) %alloca, i32 0, i32 %a + store i32 5, ptr addrspace(5) %tmp0 + %tmp1 = ptrtoint ptr addrspace(5) %alloca to i32 %tmp2 = add i32 %tmp1, 5 - %tmp3 = inttoptr i32 %tmp2 to i32 addrspace(5)* - %tmp4 = getelementptr inbounds i32, i32 addrspace(5)* %tmp3, i32 %b - %tmp5 = load i32, i32 addrspace(5)* %tmp4 - store i32 %tmp5, i32 addrspace(1)* %out + %tmp3 = inttoptr i32 %tmp2 to ptr addrspace(5) + %tmp4 = getelementptr inbounds i32, ptr addrspace(5) %tmp3, i32 %b + %tmp5 = load i32, ptr addrspace(5) %tmp4 + store i32 %tmp5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/pv.ll b/llvm/test/CodeGen/AMDGPU/pv.ll index 1474dbabba69fa..ed24e2f97c2c80 100644 --- a/llvm/test/CodeGen/AMDGPU/pv.ll +++ b/llvm/test/CodeGen/AMDGPU/pv.ll @@ -32,63 +32,63 @@ main_body: %tmp37 = extractelement <4 x float> %reg7, i32 1 %tmp38 = extractelement <4 x float> %reg7, i32 2 %tmp39 = extractelement <4 x float> %reg7, i32 3 - %tmp40 = load <4 x float>, <4 x float> addrspace(8)* null + %tmp40 = load <4 x float>, ptr addrspace(8) null %tmp41 = extractelement <4 x float> %tmp40, i32 0 %tmp42 = fmul float %tmp, %tmp41 - %tmp43 = load <4 x float>, <4 x float> addrspace(8)* null + %tmp43 = load <4 x float>, ptr addrspace(8) null %tmp44 = extractelement <4 x float> %tmp43, i32 1 %tmp45 = fmul float %tmp, %tmp44 - %tmp46 = load <4 x float>, <4 x float> addrspace(8)* null + %tmp46 = load <4 x float>, ptr addrspace(8) null %tmp47 = extractelement <4 x float> %tmp46, i32 2 %tmp48 = fmul float %tmp, %tmp47 - %tmp49 = load <4 x float>, <4 x float> addrspace(8)* null + %tmp49 = load <4 x float>, ptr addrspace(8) null %tmp50 = extractelement <4 x float> %tmp49, i32 3 %tmp51 = fmul float %tmp, %tmp50 - %tmp52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %tmp52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %tmp53 = extractelement <4 x float> %tmp52, i32 0 %tmp54 = fmul float %tmp13, %tmp53 %tmp55 = fadd float %tmp54, %tmp42 - %tmp56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %tmp56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %tmp57 = extractelement <4 x float> %tmp56, i32 1 %tmp58 = fmul float %tmp13, %tmp57 %tmp59 = fadd float %tmp58, %tmp45 - %tmp60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %tmp60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %tmp61 = extractelement <4 x float> %tmp60, i32 2 %tmp62 = fmul float %tmp13, %tmp61 %tmp63 = fadd float %tmp62, %tmp48 - %tmp64 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %tmp64 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %tmp65 = extractelement <4 x float> %tmp64, i32 3 %tmp66 = fmul float %tmp13, %tmp65 %tmp67 = fadd float %tmp66, %tmp51 - %tmp68 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %tmp68 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %tmp69 = extractelement <4 x float> %tmp68, i32 0 %tmp70 = fmul float %tmp14, %tmp69 %tmp71 = fadd float %tmp70, %tmp55 - %tmp72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %tmp72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %tmp73 = extractelement <4 x float> %tmp72, i32 1 %tmp74 = fmul float %tmp14, %tmp73 %tmp75 = fadd float %tmp74, %tmp59 - %tmp76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %tmp76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %tmp77 = extractelement <4 x float> %tmp76, i32 2 %tmp78 = fmul float %tmp14, %tmp77 %tmp79 = fadd float %tmp78, %tmp63 - %tmp80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %tmp80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %tmp81 = extractelement <4 x float> %tmp80, i32 3 %tmp82 = fmul float %tmp14, %tmp81 %tmp83 = fadd float %tmp82, %tmp67 - %tmp84 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %tmp84 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %tmp85 = extractelement <4 x float> %tmp84, i32 0 %tmp86 = fmul float %tmp15, %tmp85 %tmp87 = fadd float %tmp86, %tmp71 - %tmp88 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %tmp88 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %tmp89 = extractelement <4 x float> %tmp88, i32 1 %tmp90 = fmul float %tmp15, %tmp89 %tmp91 = fadd float %tmp90, %tmp75 - %tmp92 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %tmp92 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %tmp93 = extractelement <4 x float> %tmp92, i32 2 %tmp94 = fmul float %tmp15, %tmp93 %tmp95 = fadd float %tmp94, %tmp79 - %tmp96 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %tmp96 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %tmp97 = extractelement <4 x float> %tmp96, i32 3 %tmp98 = fmul float %tmp15, %tmp97 %tmp99 = fadd float %tmp98, %tmp83 @@ -106,15 +106,15 @@ main_body: %tmp111 = fmul float %tmp16, %tmp110 %tmp112 = fmul float %tmp17, %tmp110 %tmp113 = fmul float %tmp18, %tmp110 - %tmp114 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %tmp114 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %tmp115 = extractelement <4 x float> %tmp114, i32 0 %tmp116 = fmul float %tmp115, %tmp20 %tmp117 = fadd float %tmp116, %tmp32 - %tmp118 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %tmp118 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %tmp119 = extractelement <4 x float> %tmp118, i32 1 %tmp120 = fmul float %tmp119, %tmp21 %tmp121 = fadd float %tmp120, %tmp33 - %tmp122 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %tmp122 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %tmp123 = extractelement <4 x float> %tmp122, i32 2 %tmp124 = fmul float %tmp123, %tmp22 %tmp125 = fadd float %tmp124, %tmp34 @@ -126,11 +126,11 @@ main_body: %clamp.i10 = call float @llvm.minnum.f32(float %max.0.i9, float 1.000000e+00) %max.0.i7 = call float @llvm.maxnum.f32(float %tmp27, float 0.000000e+00) %clamp.i8 = call float @llvm.minnum.f32(float %max.0.i7, float 1.000000e+00) - %tmp126 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %tmp126 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %tmp127 = extractelement <4 x float> %tmp126, i32 0 - %tmp128 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %tmp128 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %tmp129 = extractelement <4 x float> %tmp128, i32 1 - %tmp130 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %tmp130 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %tmp131 = extractelement <4 x float> %tmp130, i32 2 %tmp132 = insertelement <4 x float> undef, float %tmp111, i32 0 %tmp133 = insertelement <4 x float> %tmp132, float %tmp112, i32 1 @@ -141,11 +141,11 @@ main_body: %tmp138 = insertelement <4 x float> %tmp137, float %tmp131, i32 2 %tmp139 = insertelement <4 x float> %tmp138, float 0.000000e+00, i32 3 %tmp140 = call float @llvm.r600.dot4(<4 x float> %tmp135, <4 x float> %tmp139) - %tmp141 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %tmp141 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %tmp142 = extractelement <4 x float> %tmp141, i32 0 - %tmp143 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %tmp143 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %tmp144 = extractelement <4 x float> %tmp143, i32 1 - %tmp145 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %tmp145 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %tmp146 = extractelement <4 x float> %tmp145, i32 2 %tmp147 = insertelement <4 x float> undef, float %tmp111, i32 0 %tmp148 = insertelement <4 x float> %tmp147, float %tmp112, i32 1 @@ -156,31 +156,31 @@ main_body: %tmp153 = insertelement <4 x float> %tmp152, float %tmp146, i32 2 %tmp154 = insertelement <4 x float> %tmp153, float 0.000000e+00, i32 3 %tmp155 = call float @llvm.r600.dot4(<4 x float> %tmp150, <4 x float> %tmp154) - %tmp156 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %tmp156 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) %tmp157 = extractelement <4 x float> %tmp156, i32 0 %tmp158 = fmul float %tmp157, %tmp20 - %tmp159 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %tmp159 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) %tmp160 = extractelement <4 x float> %tmp159, i32 1 %tmp161 = fmul float %tmp160, %tmp21 - %tmp162 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8) + %tmp162 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 8) %tmp163 = extractelement <4 x float> %tmp162, i32 2 %tmp164 = fmul float %tmp163, %tmp22 - %tmp165 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp165 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp166 = extractelement <4 x float> %tmp165, i32 0 %tmp167 = fmul float %tmp166, %tmp24 - %tmp168 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp168 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp169 = extractelement <4 x float> %tmp168, i32 1 %tmp170 = fmul float %tmp169, %tmp25 - %tmp171 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 9) + %tmp171 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 9) %tmp172 = extractelement <4 x float> %tmp171, i32 2 %tmp173 = fmul float %tmp172, %tmp26 - %tmp174 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %tmp174 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10) %tmp175 = extractelement <4 x float> %tmp174, i32 0 %tmp176 = fmul float %tmp175, %tmp28 - %tmp177 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %tmp177 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10) %tmp178 = extractelement <4 x float> %tmp177, i32 1 %tmp179 = fmul float %tmp178, %tmp29 - %tmp180 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 10) + %tmp180 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 10) %tmp181 = extractelement <4 x float> %tmp180, i32 2 %tmp182 = fmul float %tmp181, %tmp30 %tmp183 = fcmp uge float %tmp140, 0.000000e+00 diff --git a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll index 2984c280b00ac7..e6c068f5c5b12d 100644 --- a/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll +++ b/llvm/test/CodeGen/AMDGPU/r600-constant-array-fixup.ll @@ -17,11 +17,11 @@ ; CHECK: Other: 0 ; CHECK: Section: .text (0x2) ; CHECK: } -define amdgpu_kernel void @test_constant_array_fixup(i32 addrspace(1)* nocapture %out, i32 %idx) #0 { +define amdgpu_kernel void @test_constant_array_fixup(ptr addrspace(1) nocapture %out, i32 %idx) #0 { entry: - %arrayidx = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @arr, i32 0, i32 %idx - %val = load i32, i32 addrspace(4)* %arrayidx - store i32 %val, i32 addrspace(1)* %out, align 4 + %arrayidx = getelementptr inbounds [4 x i32], ptr addrspace(4) @arr, i32 0, i32 %idx + %val = load i32, ptr addrspace(4) %arrayidx + store i32 %val, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll index 4dafe7d09e9b8b..39a9413e5dda1e 100644 --- a/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll +++ b/llvm/test/CodeGen/AMDGPU/r600-export-fix.ll @@ -47,83 +47,83 @@ main_body: %1 = extractelement <4 x float> %reg1, i32 1 %2 = extractelement <4 x float> %reg1, i32 2 %3 = extractelement <4 x float> %reg1, i32 3 - %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %5 = extractelement <4 x float> %4, i32 0 %6 = fmul float %5, %0 - %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %7 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %8 = extractelement <4 x float> %7, i32 1 %9 = fmul float %8, %0 - %10 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %10 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %11 = extractelement <4 x float> %10, i32 2 %12 = fmul float %11, %0 - %13 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4) + %13 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 4) %14 = extractelement <4 x float> %13, i32 3 %15 = fmul float %14, %0 - %16 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %16 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %17 = extractelement <4 x float> %16, i32 0 %18 = fmul float %17, %1 %19 = fadd float %18, %6 - %20 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %20 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %21 = extractelement <4 x float> %20, i32 1 %22 = fmul float %21, %1 %23 = fadd float %22, %9 - %24 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %24 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %25 = extractelement <4 x float> %24, i32 2 %26 = fmul float %25, %1 %27 = fadd float %26, %12 - %28 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5) + %28 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 5) %29 = extractelement <4 x float> %28, i32 3 %30 = fmul float %29, %1 %31 = fadd float %30, %15 - %32 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %32 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) %33 = extractelement <4 x float> %32, i32 0 %34 = fmul float %33, %2 %35 = fadd float %34, %19 - %36 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %36 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) %37 = extractelement <4 x float> %36, i32 1 %38 = fmul float %37, %2 %39 = fadd float %38, %23 - %40 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %40 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) %41 = extractelement <4 x float> %40, i32 2 %42 = fmul float %41, %2 %43 = fadd float %42, %27 - %44 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6) + %44 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 6) %45 = extractelement <4 x float> %44, i32 3 %46 = fmul float %45, %2 %47 = fadd float %46, %31 - %48 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %48 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %49 = extractelement <4 x float> %48, i32 0 %50 = fmul float %49, %3 %51 = fadd float %50, %35 - %52 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %52 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %53 = extractelement <4 x float> %52, i32 1 %54 = fmul float %53, %3 %55 = fadd float %54, %39 - %56 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %56 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %57 = extractelement <4 x float> %56, i32 2 %58 = fmul float %57, %3 %59 = fadd float %58, %43 - %60 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7) + %60 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 7) %61 = extractelement <4 x float> %60, i32 3 %62 = fmul float %61, %3 %63 = fadd float %62, %47 - %64 = load <4 x float>, <4 x float> addrspace(8)* null + %64 = load <4 x float>, ptr addrspace(8) null %65 = extractelement <4 x float> %64, i32 0 - %66 = load <4 x float>, <4 x float> addrspace(8)* null + %66 = load <4 x float>, ptr addrspace(8) null %67 = extractelement <4 x float> %66, i32 1 - %68 = load <4 x float>, <4 x float> addrspace(8)* null + %68 = load <4 x float>, ptr addrspace(8) null %69 = extractelement <4 x float> %68, i32 2 - %70 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %70 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %71 = extractelement <4 x float> %70, i32 0 - %72 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %72 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %73 = extractelement <4 x float> %72, i32 1 - %74 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2) + %74 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 2) %75 = extractelement <4 x float> %74, i32 2 - %76 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %76 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %77 = extractelement <4 x float> %76, i32 0 - %78 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %78 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %79 = extractelement <4 x float> %78, i32 1 - %80 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3) + %80 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 3) %81 = extractelement <4 x float> %80, i32 2 %82 = insertelement <4 x float> undef, float %51, i32 0 %83 = insertelement <4 x float> %82, float %55, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll index f0604c7fe7829f..f3a785db0b2b4f 100644 --- a/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/r600-legalize-umax-bug.ll @@ -2,7 +2,7 @@ ; RUN: llc < %s -march=r600 -mcpu=cypress -start-after safe-stack | FileCheck %s ; Don't crash -define amdgpu_kernel void @test(i64 addrspace(1)* %out) { +define amdgpu_kernel void @test(ptr addrspace(1) %out) { ; CHECK-LABEL: test: ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT: ALU 4, @6, KC0[CB0:0-32], KC1[] @@ -23,13 +23,13 @@ define amdgpu_kernel void @test(i64 addrspace(1)* %out) { ; CHECK-NEXT: LSHR * T1.X, T0.W, literal.y, ; CHECK-NEXT: 4(5.605194e-45), 2(2.802597e-45) bb: - store i64 2, i64 addrspace(1)* %out - %tmp = load i64, i64 addrspace(1)* %out + store i64 2, ptr addrspace(1) %out + %tmp = load i64, ptr addrspace(1) %out br label %jump jump: ; preds = %bb %tmp1 = icmp ugt i64 %tmp, 4 %umax = select i1 %tmp1, i64 %tmp, i64 4 - store i64 %umax, i64 addrspace(1)* %out + store i64 %umax, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.add.ll b/llvm/test/CodeGen/AMDGPU/r600.add.ll index 73eea3ef217744..a4a7c3d86d801f 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.add.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.add.ll @@ -2,24 +2,24 @@ ; FUNC-LABEL: {{^}}s_add_i32: ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { - %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr +define amdgpu_kernel void @s_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i32, ptr addrspace(1) %in + %b = load i32, ptr addrspace(1) %b_ptr %result = add i32 %a, %b - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}s_add_v2i32: ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { - %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 - %a = load <2 x i32>, <2 x i32> addrspace(1)* %in - %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr +define amdgpu_kernel void @s_add_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1 + %a = load <2 x i32>, ptr addrspace(1) %in + %b = load <2 x i32>, ptr addrspace(1) %b_ptr %result = add <2 x i32> %a, %b - store <2 x i32> %result, <2 x i32> addrspace(1)* %out + store <2 x i32> %result, ptr addrspace(1) %out ret void } @@ -28,12 +28,12 @@ define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> a ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 - %a = load <4 x i32>, <4 x i32> addrspace(1)* %in - %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr +define amdgpu_kernel void @s_add_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1 + %a = load <4 x i32>, ptr addrspace(1) %in + %b = load <4 x i32>, ptr addrspace(1) %b_ptr %result = add <4 x i32> %a, %b - store <4 x i32> %result, <4 x i32> addrspace(1)* %out + store <4 x i32> %result, ptr addrspace(1) %out ret void } @@ -46,10 +46,10 @@ define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> a ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT -define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { +define amdgpu_kernel void @s_add_v8i32(ptr addrspace(1) %out, <8 x i32> %a, <8 x i32> %b) { entry: %0 = add <8 x i32> %a, %b - store <8 x i32> %0, <8 x i32> addrspace(1)* %out + store <8 x i32> %0, ptr addrspace(1) %out ret void } @@ -70,33 +70,33 @@ entry: ; EG: ADD_INT ; EG: ADD_INT ; EG: ADD_INT -define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { +define amdgpu_kernel void @s_add_v16i32(ptr addrspace(1) %out, <16 x i32> %a, <16 x i32> %b) { entry: %0 = add <16 x i32> %a, %b - store <16 x i32> %0, <16 x i32> addrspace(1)* %out + store <16 x i32> %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}v_add_i32: -define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 - %a = load volatile i32, i32 addrspace(1)* %gep - %b = load volatile i32, i32 addrspace(1)* %b_ptr + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 + %a = load volatile i32, ptr addrspace(1) %gep + %b = load volatile i32, ptr addrspace(1) %b_ptr %result = add i32 %a, %b - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}v_add_imm_i32: -define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { +define amdgpu_kernel void @v_add_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() - %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 - %a = load volatile i32, i32 addrspace(1)* %gep + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 + %a = load volatile i32, ptr addrspace(1) %gep %result = add i32 %a, 123 - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } @@ -107,10 +107,10 @@ define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1 ; EG-DAG: ADD_INT ; EG-DAG: ADD_INT {{[* ]*}} ; EG-NOT: SUB -define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { +define amdgpu_kernel void @add64(ptr addrspace(1) %out, i64 %a, i64 %b) { entry: %add = add i64 %a, %b - store i64 %add, i64 addrspace(1)* %out + store i64 %add, ptr addrspace(1) %out ret void } @@ -126,11 +126,11 @@ entry: ; EG-DAG: ADD_INT ; EG-DAG: ADD_INT {{[* ]*}} ; EG-NOT: SUB -define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { +define amdgpu_kernel void @add64_sgpr_vgpr(ptr addrspace(1) %out, i64 %a, ptr addrspace(1) %in) { entry: - %0 = load i64, i64 addrspace(1)* %in + %0 = load i64, ptr addrspace(1) %in %1 = add i64 %a, %0 - store i64 %1, i64 addrspace(1)* %out + store i64 %1, ptr addrspace(1) %out ret void } @@ -142,13 +142,13 @@ entry: ; EG-DAG: ADD_INT ; EG-DAG: ADD_INT {{[* ]*}} ; EG-NOT: SUB -define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { +define amdgpu_kernel void @add64_in_branch(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %a, i64 %b, i64 %c) { entry: %0 = icmp eq i64 %a, 0 br i1 %0, label %if, label %else if: - %1 = load i64, i64 addrspace(1)* %in + %1 = load i64, ptr addrspace(1) %in br label %endif else: @@ -157,7 +157,7 @@ else: endif: %3 = phi i64 [%1, %if], [%2, %else] - store i64 %3, i64 addrspace(1)* %out + store i64 %3, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll index 6caa6173e7a010..acac5c321dfd67 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.alu-limits.ll @@ -6,10 +6,10 @@ %struct.foo = type {i32, i32, i32} -define amdgpu_kernel void @alu_limits(i32 addrspace(1)* %out, %struct.foo addrspace(5)* %in, i32 %offset) { +define amdgpu_kernel void @alu_limits(ptr addrspace(1) %out, ptr addrspace(5) %in, i32 %offset) { entry: - %ptr = getelementptr inbounds %struct.foo, %struct.foo addrspace(5)* %in, i32 1, i32 2 - %x = load i32, i32 addrspace(5)*%ptr, align 4 + %ptr = getelementptr inbounds %struct.foo, ptr addrspace(5) %in, i32 1, i32 2 + %x = load i32, ptr addrspace(5) %ptr, align 4 br label %loop loop: %i = phi i32 [ 100, %entry ], [ %nexti, %loop ] @@ -24,6 +24,6 @@ loop: br i1 %cond, label %loop, label %end end: %out_val = add i32 %nextval, 4 - store i32 %out_val, i32 addrspace(1)* %out, align 4 + store i32 %out_val, ptr addrspace(1) %out, align 4 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll index a07c0a5d542237..66e38f59f44bfd 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.amdgpu-alias-analysis.ll @@ -1,8 +1,8 @@ ; RUN: opt -mtriple=r600-- -passes='default,aa-eval' -print-all-alias-modref-info -disable-output < %s 2>&1 | FileCheck %s ; CHECK: MayAlias: i8 addrspace(5)* %p, i8 addrspace(999)* %p1 -define amdgpu_kernel void @test(i8 addrspace(5)* %p, i8 addrspace(999)* %p1) { - load volatile i8, i8 addrspace(5)* %p - load volatile i8, i8 addrspace(999)* %p1 +define amdgpu_kernel void @test(ptr addrspace(5) %p, ptr addrspace(999) %p1) { + load volatile i8, ptr addrspace(5) %p + load volatile i8, ptr addrspace(999) %p1 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll index 20de997e1bceff..e7674ce6d88d92 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.bitcast.ll @@ -3,7 +3,7 @@ ; This test just checks that the compiler doesn't crash. -define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrspace(1)* %in) { +define amdgpu_kernel void @i8ptr_v16i8ptr(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; EG-LABEL: i8ptr_v16i8ptr: ; EG: ; %bb.0: ; %entry ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -20,13 +20,12 @@ define amdgpu_kernel void @i8ptr_v16i8ptr(<16 x i8> addrspace(1)* %out, i8 addrs ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) entry: - %0 = bitcast i8 addrspace(1)* %in to <16 x i8> addrspace(1)* - %1 = load <16 x i8>, <16 x i8> addrspace(1)* %0 - store <16 x i8> %1, <16 x i8> addrspace(1)* %out + %0 = load <16 x i8>, ptr addrspace(1) %in + store <16 x i8> %0, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addrspace(1)* %in) nounwind { +define amdgpu_kernel void @f32_to_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: f32_to_v2i16: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -42,13 +41,13 @@ define amdgpu_kernel void @f32_to_v2i16(<2 x i16> addrspace(1)* %out, float addr ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load float, float addrspace(1)* %in, align 4 + %load = load float, ptr addrspace(1) %in, align 4 %bc = bitcast float %load to <2 x i16> - store <2 x i16> %bc, <2 x i16> addrspace(1)* %out, align 4 + store <2 x i16> %bc, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v2i16_to_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: v2i16_to_f32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -64,13 +63,13 @@ define amdgpu_kernel void @v2i16_to_f32(float addrspace(1)* %out, <2 x i16> addr ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 + %load = load <2 x i16>, ptr addrspace(1) %in, align 4 %bc = bitcast <2 x i16> %load to float - store float %bc, float addrspace(1)* %out, align 4 + store float %bc, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v4i8_to_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: v4i8_to_i32: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -86,13 +85,13 @@ define amdgpu_kernel void @v4i8_to_i32(i32 addrspace(1)* %out, <4 x i8> addrspac ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load <4 x i8>, <4 x i8> addrspace(1)* %in, align 4 + %load = load <4 x i8>, ptr addrspace(1) %in, align 4 %bc = bitcast <4 x i8> %load to i32 - store i32 %bc, i32 addrspace(1)* %out, align 4 + store i32 %bc, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { +define amdgpu_kernel void @i32_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: i32_to_v4i8: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -108,13 +107,13 @@ define amdgpu_kernel void @i32_to_v4i8(<4 x i8> addrspace(1)* %out, i32 addrspac ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load i32, i32 addrspace(1)* %in, align 4 + %load = load i32, ptr addrspace(1) %in, align 4 %bc = bitcast i32 %load to <4 x i8> - store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 + store <4 x i8> %bc, ptr addrspace(1) %out, align 4 ret void } -define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v2i16_to_v4i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: v2i16_to_v4i8: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -130,16 +129,16 @@ define amdgpu_kernel void @v2i16_to_v4i8(<4 x i8> addrspace(1)* %out, <2 x i16> ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load <2 x i16>, <2 x i16> addrspace(1)* %in, align 4 + %load = load <2 x i16>, ptr addrspace(1) %in, align 4 %bc = bitcast <2 x i16> %load to <4 x i8> - store <4 x i8> %bc, <4 x i8> addrspace(1)* %out, align 4 + store <4 x i8> %bc, ptr addrspace(1) %out, align 4 ret void } ; This just checks for crash in BUILD_VECTOR/EXTRACT_ELEMENT combine ; the stack manipulation is tricky to follow ; TODO: This should only use one load -define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> addrspace(1)* %in) nounwind { +define amdgpu_kernel void @v4i16_extract_i8(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind { ; EG-LABEL: v4i16_extract_i8: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] @@ -172,14 +171,14 @@ define amdgpu_kernel void @v4i16_extract_i8(i8 addrspace(1)* %out, <4 x i16> add ; EG-NEXT: MOV * T5.Z, 0.0, ; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %load = load <4 x i16>, <4 x i16> addrspace(1)* %in, align 2 + %load = load <4 x i16>, ptr addrspace(1) %in, align 2 %bc = bitcast <4 x i16> %load to <8 x i8> %element = extractelement <8 x i8> %bc, i32 5 - store i8 %element, i8 addrspace(1)* %out + store i8 %element, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { +define amdgpu_kernel void @bitcast_v2i32_to_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { ; EG-LABEL: bitcast_v2i32_to_f64: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] @@ -195,9 +194,9 @@ define amdgpu_kernel void @bitcast_v2i32_to_f64(double addrspace(1)* %out, <2 x ; EG-NEXT: ALU clause starting at 9: ; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, ; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) - %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8 + %val = load <2 x i32>, ptr addrspace(1) %in, align 8 %bc = bitcast <2 x i32> %val to double - store double %bc, double addrspace(1)* %out, align 8 + store double %bc, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll index 71af6a9a4f510e..9f2cf98dc3ef1a 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.extract-lowbits.ll @@ -16,7 +16,7 @@ ; Pattern a. 32-bit ; ---------------------------------------------------------------------------- ; -define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_a0: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -41,11 +41,11 @@ define amdgpu_kernel void @bzhi32_a0(i32 %val, i32 %numlowbits, i32 addrspace(1) %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_a1_indexzext: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[], KC1[] @@ -87,11 +87,11 @@ define amdgpu_kernel void @bzhi32_a1_indexzext(i32 %val, i8 zeroext %numlowbits, %onebit = shl i32 1, %conv %mask = add nsw i32 %onebit, -1 %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_a4_commutative: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -116,7 +116,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 %onebit = shl i32 1, %numlowbits %mask = add nsw i32 %onebit, -1 %masked = and i32 %val, %mask ; swapped order - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } @@ -124,7 +124,7 @@ define amdgpu_kernel void @bzhi32_a4_commutative(i32 %val, i32 %numlowbits, i32 ; Pattern b. 32-bit ; ---------------------------------------------------------------------------- ; -define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_b0: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -149,11 +149,11 @@ define amdgpu_kernel void @bzhi32_b0(i32 %val, i32 %numlowbits, i32 addrspace(1) %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_b1_indexzext: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[], KC1[] @@ -195,11 +195,11 @@ define amdgpu_kernel void @bzhi32_b1_indexzext(i32 %val, i8 zeroext %numlowbits, %notmask = shl i32 -1, %conv %mask = xor i32 %notmask, -1 %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_b4_commutative: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -224,7 +224,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 %notmask = shl i32 -1, %numlowbits %mask = xor i32 %notmask, -1 %masked = and i32 %val, %mask ; swapped order - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } @@ -232,7 +232,7 @@ define amdgpu_kernel void @bzhi32_b4_commutative(i32 %val, i32 %numlowbits, i32 ; Pattern c. 32-bit ; ---------------------------------------------------------------------------- ; -define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_c0: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -257,11 +257,11 @@ define amdgpu_kernel void @bzhi32_c0(i32 %val, i32 %numlowbits, i32 addrspace(1) %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_c1_indexzext: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[], KC1[] @@ -311,11 +311,11 @@ define amdgpu_kernel void @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits, i32 add %sh_prom = zext i8 %numhighbits to i32 %mask = lshr i32 -1, %sh_prom %masked = and i32 %mask, %val - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_c4_commutative: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -340,7 +340,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 %numhighbits = sub i32 32, %numlowbits %mask = lshr i32 -1, %numhighbits %masked = and i32 %val, %mask ; swapped order - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } @@ -348,7 +348,7 @@ define amdgpu_kernel void @bzhi32_c4_commutative(i32 %val, i32 %numlowbits, i32 ; Pattern d. 32-bit. ; ---------------------------------------------------------------------------- ; -define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_d0: ; EG: ; %bb.0: ; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] @@ -373,11 +373,11 @@ define amdgpu_kernel void @bzhi32_d0(i32 %val, i32 %numlowbits, i32 addrspace(1) %numhighbits = sub i32 32, %numlowbits %highbitscleared = shl i32 %val, %numhighbits %masked = lshr i32 %highbitscleared, %numhighbits - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } -define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 addrspace(1)* %out) { +define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, ptr addrspace(1) %out) { ; EG-LABEL: bzhi32_d1_indexzext: ; EG: ; %bb.0: ; EG-NEXT: ALU 0, @8, KC0[], KC1[] @@ -425,6 +425,6 @@ define amdgpu_kernel void @bzhi32_d1_indexzext(i32 %val, i8 %numlowbits, i32 add %sh_prom = zext i8 %numhighbits to i32 %highbitscleared = shl i32 %val, %sh_prom %masked = lshr i32 %highbitscleared, %sh_prom - store i32 %masked, i32 addrspace(1)* %out + store i32 %masked, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll index 3d2f1b4fb9f4f5..1963574c9494db 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.global_atomics.ll @@ -6,20 +6,20 @@ ; FUNC-LABEL: {{^}}atomic_add_i32_offset: ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_i32_soffset: ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_soffset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 9000 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst ret void } @@ -27,516 +27,516 @@ entry: ; FIXME: looks like the offset is wrong ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32_huge_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595 + %gep = getelementptr i32, ptr addrspace(1) %out, i64 47224239175595 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_i32: ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_add_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile add ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_i32_addr64: ; EG: MEM_RAT ATOMIC_ADD [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_add_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile add ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_and_i32_offset: ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_and_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_and_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile and ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_and_i32: ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_and_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile and ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_and_i32_addr64: ; EG: MEM_RAT ATOMIC_AND [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_and_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile and ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_i32_offset: ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_i32: ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_sub_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile sub ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_i32_addr64: ; EG: MEM_RAT ATOMIC_SUB [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_sub_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile sub ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_max_i32_offset: ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_max_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_max_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile max ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_max_i32: ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_max_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile max ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_max_i32_addr64: ; EG: MEM_RAT ATOMIC_MAX_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_max_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile max ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umax_i32_offset: ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile umax ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umax_i32: ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umax_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile umax ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umax_i32_addr64: ; EG: MEM_RAT ATOMIC_MAX_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umax_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile umax ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_min_i32_offset: ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_min_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_min_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile min ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_min_i32: ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_min_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile min ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_min_i32_addr64: ; EG: MEM_RAT ATOMIC_MIN_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_min_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile min ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umin_i32_offset: ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile umin ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umin_i32: ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_umin_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile umin ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_umin_i32_addr64: ; EG: MEM_RAT ATOMIC_MIN_UINT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_umin_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile umin ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_or_i32_offset: ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_or_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_or_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile or ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_or_i32: ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_or_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile or ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_or_i32_addr64: ; EG: MEM_RAT ATOMIC_OR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_or_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile or ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xchg_i32_offset: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile xchg ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xchg_i32: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xchg_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile xchg ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xchg_i32_addr64: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile xchg ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_offset: ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_offset(ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = cmpxchg volatile ptr addrspace(1) %gep, i32 %old, i32 %in seq_cst seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32: ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32(ptr addrspace(1) %out, i32 %in, i32 %old) { entry: - %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst + %val = cmpxchg volatile ptr addrspace(1) %out, i32 %old, i32 %in seq_cst seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_cmpxchg_i32_addr64: ; EG: MEM_RAT ATOMIC_CMPXCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) { +define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index, i32 %old) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = cmpxchg volatile ptr addrspace(1) %ptr, i32 %old, i32 %in seq_cst seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xor_i32_offset: ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32_offset(ptr addrspace(1) %out, i32 %in) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_addr64_offset(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + %val = atomicrmw volatile xor ptr addrspace(1) %gep, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xor_i32: ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @atomic_xor_i32(ptr addrspace(1) %out, i32 %in) { entry: - %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst + %val = atomicrmw volatile xor ptr addrspace(1) %out, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_xor_i32_addr64: ; EG: MEM_RAT ATOMIC_XOR [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Z -define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) { +define amdgpu_kernel void @atomic_xor_i32_addr64(ptr addrspace(1) %out, i32 %in, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %val = atomicrmw volatile xor ptr addrspace(1) %ptr, i32 %in seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_store_i32_offset: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y -define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, ptr addrspace(1) %out) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + store atomic i32 %in, ptr addrspace(1) %gep seq_cst, align 4 ret void } ; FUNC-LABEL: {{^}}atomic_store_i32: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y -define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_store_i32(i32 %in, ptr addrspace(1) %out) { entry: - store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4 + store atomic i32 %in, ptr addrspace(1) %out seq_cst, align 4 ret void } ; FUNC-LABEL: {{^}}atomic_store_i32_addr64_offset: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y -define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, ptr addrspace(1) %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4 - store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4 + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + %gep = getelementptr i32, ptr addrspace(1) %ptr, i64 4 + store atomic i32 %in, ptr addrspace(1) %gep seq_cst, align 4 ret void } ; FUNC-LABEL: {{^}}atomic_store_i32_addr64: ; EG: MEM_RAT ATOMIC_XCHG_INT [[REG:T[0-9]+]] ; EG: MOV{{[ *]*}}[[REG]].X, KC0[2].Y -define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) { +define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, ptr addrspace(1) %out, i64 %index) { entry: - %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index - store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4 + %ptr = getelementptr i32, ptr addrspace(1) %out, i64 %index + store atomic i32 %in, ptr addrspace(1) %ptr seq_cst, align 4 ret void } ; FUNC-LABEL: {{^}}atomic_add_1 ; EG: MEM_RAT ATOMIC_ADD -define amdgpu_kernel void @atomic_add_1(i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_add_1(ptr addrspace(1) %out) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 1 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 1 seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_add_neg1 ; EG: MEM_RAT ATOMIC_ADD -define amdgpu_kernel void @atomic_add_neg1(i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_add_neg1(ptr addrspace(1) %out) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 -1 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile add ptr addrspace(1) %gep, i32 -1 seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_neg1 ; EG: MEM_RAT ATOMIC_SUB -define amdgpu_kernel void @atomic_sub_neg1(i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_sub_neg1(ptr addrspace(1) %out) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 -1 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 -1 seq_cst ret void } ; FUNC-LABEL: {{^}}atomic_sub_1 ; EG: MEM_RAT ATOMIC_SUB -define amdgpu_kernel void @atomic_sub_1(i32 addrspace(1)* %out) { +define amdgpu_kernel void @atomic_sub_1(ptr addrspace(1) %out) { entry: - %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4 - %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 1 seq_cst + %gep = getelementptr i32, ptr addrspace(1) %out, i64 4 + %val = atomicrmw volatile sub ptr addrspace(1) %gep, i32 1 seq_cst ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll index 99d55feb740ea7..1650185a9f0744 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.private-memory.ll @@ -10,17 +10,16 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; Additional check in case the move ends up in the last slot ; R600-NOT: MOV * TO.X -define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { +define amdgpu_kernel void @work_item_info(ptr addrspace(1) %out, i32 %in) { entry: %0 = alloca [2 x i32], addrspace(5) - %1 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 0 - %2 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 1 - store i32 0, i32 addrspace(5)* %1 - store i32 1, i32 addrspace(5)* %2 - %3 = getelementptr [2 x i32], [2 x i32] addrspace(5)* %0, i32 0, i32 %in - %4 = load i32, i32 addrspace(5)* %3 - %5 = call i32 @llvm.r600.read.tidig.x() - %6 = add i32 %4, %5 - store i32 %6, i32 addrspace(1)* %out + %1 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 1 + store i32 0, ptr addrspace(5) %0 + store i32 1, ptr addrspace(5) %1 + %2 = getelementptr [2 x i32], ptr addrspace(5) %0, i32 0, i32 %in + %3 = load i32, ptr addrspace(5) %2 + %4 = call i32 @llvm.r600.read.tidig.x() + %5 = add i32 %3, %4 + store i32 %5, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.sub.ll b/llvm/test/CodeGen/AMDGPU/r600.sub.ll index 2ded4f64328d48..98eec2f08b37c8 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.sub.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.sub.ll @@ -3,48 +3,48 @@ declare i32 @llvm.r600.read.tidig.x() readnone ; FUNC-LABEL: {{^}}s_sub_i32: -define amdgpu_kernel void @s_sub_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) { +define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) { %result = sub i32 %a, %b - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}s_sub_imm_i32: -define amdgpu_kernel void @s_sub_imm_i32(i32 addrspace(1)* %out, i32 %a) { +define amdgpu_kernel void @s_sub_imm_i32(ptr addrspace(1) %out, i32 %a) { %result = sub i32 1234, %a - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 - %a = load i32, i32 addrspace(1)* %in - %b = load i32, i32 addrspace(1)* %b_ptr +define amdgpu_kernel void @test_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 + %a = load i32, ptr addrspace(1) %in + %b = load i32, ptr addrspace(1) %b_ptr %result = sub i32 %a, %b - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_imm_i32: ; EG: SUB_INT -define amdgpu_kernel void @test_sub_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %a = load i32, i32 addrspace(1)* %in +define amdgpu_kernel void @test_sub_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %a = load i32, ptr addrspace(1) %in %result = sub i32 123, %a - store i32 %result, i32 addrspace(1)* %out + store i32 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_v2i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { - %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 - %a = load <2 x i32>, <2 x i32> addrspace(1) * %in - %b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr +define amdgpu_kernel void @test_sub_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1 + %a = load <2 x i32>, ptr addrspace(1) %in + %b = load <2 x i32>, ptr addrspace(1) %b_ptr %result = sub <2 x i32> %a, %b - store <2 x i32> %result, <2 x i32> addrspace(1)* %out + store <2 x i32> %result, ptr addrspace(1) %out ret void } @@ -53,48 +53,48 @@ define amdgpu_kernel void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32 ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -define amdgpu_kernel void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 - %a = load <4 x i32>, <4 x i32> addrspace(1) * %in - %b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr +define amdgpu_kernel void @test_sub_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1 + %a = load <4 x i32>, ptr addrspace(1) %in + %b = load <4 x i32>, ptr addrspace(1) %b_ptr %result = sub <4 x i32> %a, %b - store <4 x i32> %result, <4 x i32> addrspace(1)* %out + store <4 x i32> %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_i16: -define amdgpu_kernel void @test_sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { +define amdgpu_kernel void @test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.r600.read.tidig.x() - %gep = getelementptr i16, i16 addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i32 1 - %a = load volatile i16, i16 addrspace(1)* %gep - %b = load volatile i16, i16 addrspace(1)* %b_ptr + %gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i32 1 + %a = load volatile i16, ptr addrspace(1) %gep + %b = load volatile i16, ptr addrspace(1) %b_ptr %result = sub i16 %a, %b - store i16 %result, i16 addrspace(1)* %out + store i16 %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_v2i16: -define amdgpu_kernel void @test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { +define amdgpu_kernel void @test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.r600.read.tidig.x() - %gep = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 - %a = load <2 x i16>, <2 x i16> addrspace(1)* %gep - %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr + %gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1 + %a = load <2 x i16>, ptr addrspace(1) %gep + %b = load <2 x i16>, ptr addrspace(1) %b_ptr %result = sub <2 x i16> %a, %b - store <2 x i16> %result, <2 x i16> addrspace(1)* %out + store <2 x i16> %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_sub_v4i16: -define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { +define amdgpu_kernel void @test_sub_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { %tid = call i32 @llvm.r600.read.tidig.x() - %gep = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid - %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 - %a = load <4 x i16>, <4 x i16> addrspace(1) * %gep - %b = load <4 x i16>, <4 x i16> addrspace(1) * %b_ptr + %gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1 + %a = load <4 x i16>, ptr addrspace(1) %gep + %b = load <4 x i16>, ptr addrspace(1) %b_ptr %result = sub <4 x i16> %a, %b - store <4 x i16> %result, <4 x i16> addrspace(1)* %out + store <4 x i16> %result, ptr addrspace(1) %out ret void } @@ -104,9 +104,9 @@ define amdgpu_kernel void @test_sub_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16 ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT {{[* ]*}} -define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 %b) nounwind { +define amdgpu_kernel void @s_sub_i64(ptr addrspace(1) noalias %out, i64 %a, i64 %b) nounwind { %result = sub i64 %a, %b - store i64 %result, i64 addrspace(1)* %out, align 8 + store i64 %result, ptr addrspace(1) %out, align 8 ret void } @@ -116,37 +116,37 @@ define amdgpu_kernel void @s_sub_i64(i64 addrspace(1)* noalias %out, i64 %a, i64 ; EG-DAG: SUBB_UINT ; EG-DAG: SUB_INT ; EG-DAG: SUB_INT {{[* ]*}} -define amdgpu_kernel void @v_sub_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %inA, i64 addrspace(1)* noalias %inB) nounwind { +define amdgpu_kernel void @v_sub_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind { %tid = call i32 @llvm.r600.read.tidig.x() readnone - %a_ptr = getelementptr i64, i64 addrspace(1)* %inA, i32 %tid - %b_ptr = getelementptr i64, i64 addrspace(1)* %inB, i32 %tid - %a = load i64, i64 addrspace(1)* %a_ptr - %b = load i64, i64 addrspace(1)* %b_ptr + %a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid + %b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid + %a = load i64, ptr addrspace(1) %a_ptr + %b = load i64, ptr addrspace(1) %b_ptr %result = sub i64 %a, %b - store i64 %result, i64 addrspace(1)* %out, align 8 + store i64 %result, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}v_test_sub_v2i64: -define amdgpu_kernel void @v_test_sub_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* noalias %inA, <2 x i64> addrspace(1)* noalias %inB) { +define amdgpu_kernel void @v_test_sub_v2i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) { %tid = call i32 @llvm.r600.read.tidig.x() readnone - %a_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inA, i32 %tid - %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %inB, i32 %tid - %a = load <2 x i64>, <2 x i64> addrspace(1)* %a_ptr - %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr + %a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid + %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid + %a = load <2 x i64>, ptr addrspace(1) %a_ptr + %b = load <2 x i64>, ptr addrspace(1) %b_ptr %result = sub <2 x i64> %a, %b - store <2 x i64> %result, <2 x i64> addrspace(1)* %out + store <2 x i64> %result, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}v_test_sub_v4i64: -define amdgpu_kernel void @v_test_sub_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* noalias %inA, <4 x i64> addrspace(1)* noalias %inB) { +define amdgpu_kernel void @v_test_sub_v4i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) { %tid = call i32 @llvm.r600.read.tidig.x() readnone - %a_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inA, i32 %tid - %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %inB, i32 %tid - %a = load <4 x i64>, <4 x i64> addrspace(1)* %a_ptr - %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr + %a_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inA, i32 %tid + %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inB, i32 %tid + %a = load <4 x i64>, ptr addrspace(1) %a_ptr + %b = load <4 x i64>, ptr addrspace(1) %b_ptr %result = sub <4 x i64> %a, %b - store <4 x i64> %result, <4 x i64> addrspace(1)* %out + store <4 x i64> %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll index 03f0539e19be9e..323a84df4abc2e 100644 --- a/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/r600.work-item-intrinsics.ll @@ -3,59 +3,59 @@ ; FUNC-LABEL: {{^}}tgid_x: ; EG: MEM_RAT_CACHELESS STORE_RAW T1.X -define amdgpu_kernel void @tgid_x(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tgid_x(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tgid.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}tgid_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X ; EG: MOV [[REG]].X, T1.Y -define amdgpu_kernel void @tgid_y(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tgid_y(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tgid.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}tgid_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X ; EG: MOV [[REG]].X, T1.Z -define amdgpu_kernel void @tgid_z(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tgid_z(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tgid.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}tidig_x: ; EG: MEM_RAT_CACHELESS STORE_RAW T0.X -define amdgpu_kernel void @tidig_x(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tidig_x(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tidig.x() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}tidig_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X ; EG: MOV [[REG]].X, T0.Y -define amdgpu_kernel void @tidig_y(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tidig_y(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tidig.y() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}tidig_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[REG:T[0-9]+]].X ; EG: MOV [[REG]].X, T0.Z -define amdgpu_kernel void @tidig_z(i32 addrspace(1)* %out) { +define amdgpu_kernel void @tidig_z(ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.r600.read.tidig.z() #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -65,28 +65,26 @@ entry: ; EG-NOT: VTX_READ ; EG-DAG: MOV {{\*?}} [[VAL]], KC0[3].Z ; EG-DAG: LSHR {{\*? *}}[[PTR]], KC0[2].Y, literal -define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 { - %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() - %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* - %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 4 - %value = load i32, i32 addrspace(7)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit(ptr addrspace(1) %out) #1 { + %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr() + %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 4 + %value = load i32, ptr addrspace(7) %gep + store i32 %value, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}test_implicit_dyn: ; 36 prepended implicit bytes + 8(out pointer + in) = 44 ; EG: VTX_READ_32 {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 44, #3 -define amdgpu_kernel void @test_implicit_dyn(i32 addrspace(1)* %out, i32 %in) #1 { - %implicitarg.ptr = call noalias i8 addrspace(7)* @llvm.r600.implicitarg.ptr() - %header.ptr = bitcast i8 addrspace(7)* %implicitarg.ptr to i32 addrspace(7)* - %gep = getelementptr i32, i32 addrspace(7)* %header.ptr, i32 %in - %value = load i32, i32 addrspace(7)* %gep - store i32 %value, i32 addrspace(1)* %out +define amdgpu_kernel void @test_implicit_dyn(ptr addrspace(1) %out, i32 %in) #1 { + %implicitarg.ptr = call noalias ptr addrspace(7) @llvm.r600.implicitarg.ptr() + %gep = getelementptr i32, ptr addrspace(7) %implicitarg.ptr, i32 %in + %value = load i32, ptr addrspace(7) %gep + store i32 %value, ptr addrspace(1) %out ret void } -declare i8 addrspace(7)* @llvm.r600.implicitarg.ptr() #0 +declare ptr addrspace(7) @llvm.r600.implicitarg.ptr() #0 declare i32 @llvm.r600.read.tgid.x() #0 declare i32 @llvm.r600.read.tgid.y() #0 diff --git a/llvm/test/CodeGen/AMDGPU/r600cfg.ll b/llvm/test/CodeGen/AMDGPU/r600cfg.ll index 2996a1053da51d..f2c7fcb38716d8 100644 --- a/llvm/test/CodeGen/AMDGPU/r600cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/r600cfg.ll @@ -83,7 +83,7 @@ ELSE45: ; preds = %ENDIF40 ENDIF43: ; preds = %ELSE45, %IF44 %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ] %52 = bitcast i32 %.sink to float - %53 = load <4 x float>, <4 x float> addrspace(8)* null + %53 = load <4 x float>, ptr addrspace(8) null %54 = extractelement <4 x float> %53, i32 0 %55 = bitcast float %54 to i32 br label %LOOP47 diff --git a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll index 4ea503bf6098ed..cf2831f21feada 100644 --- a/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll +++ b/llvm/test/CodeGen/AMDGPU/sampler-resource-id.ll @@ -5,10 +5,10 @@ ; EG: MOV [[VAL]], literal.x ; EG-NEXT: LSHR ; EG-NEXT: 0( -define amdgpu_kernel void @test_0(i32 %in0, i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_0(i32 %in0, ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in0) #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -17,10 +17,10 @@ entry: ; EG: MOV [[VAL]], literal.x ; EG-NEXT: LSHR ; EG-NEXT: 1( -define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_1(i32 %in0, i32 %in1, ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in1) #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -29,10 +29,10 @@ entry: ; EG: MOV [[VAL]], literal.x ; EG-NEXT: LSHR ; EG-NEXT: 2( -define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, i32 addrspace(1)* %out) { +define amdgpu_kernel void @test_2(i32 %in0, i32 %in1, i32 %in2, ptr addrspace(1) %out) { entry: %0 = call i32 @llvm.OpenCL.sampler.get.resource.id(i32 %in2) #0 - store i32 %0, i32 addrspace(1)* %out + store i32 %0, ptr addrspace(1) %out ret void } @@ -43,21 +43,21 @@ attributes #0 = { readnone } !opencl.kernels = !{!0, !1, !2} -!0 = !{void (i32, i32 addrspace(1)*)* @test_0, !10, !20, !30, !40, !50} +!0 = !{ptr @test_0, !10, !20, !30, !40, !50} !10 = !{!"kernel_arg_addr_space", i32 0, i32 1} !20 = !{!"kernel_arg_access_qual", !"none", !"none"} !30 = !{!"kernel_arg_type", !"sampler_t", !"int*"} !40 = !{!"kernel_arg_base_type", !"sampler_t", !"int*"} !50 = !{!"kernel_arg_type_qual", !"", !""} -!1 = !{void (i32, i32, i32 addrspace(1)*)* @test_1, !11, !21, !31, !41, !51} +!1 = !{ptr @test_1, !11, !21, !31, !41, !51} !11 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 1} !21 = !{!"kernel_arg_access_qual", !"none", !"none", !"none"} !31 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"int*"} !41 = !{!"kernel_arg_base_type", !"sampler_t", !"sampler_t", !"int*"} !51 = !{!"kernel_arg_type_qual", !"", !"", !""} -!2 = !{void (i32, i32, i32, i32 addrspace(1)*)* @test_2, !12, !22, !32, !42, !52} +!2 = !{ptr @test_2, !12, !22, !32, !42, !52} !12 = !{!"kernel_arg_addr_space", i32 0, i32 0, i32 0, i32 1} !22 = !{!"kernel_arg_access_qual", !"none", !"none", !"none", !"none"} !32 = !{!"kernel_arg_type", !"sampler_t", !"sampler_t", !"sampler_t", !"int*"} diff --git a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll index 2cdfb06268e4bc..8495486aa49165 100644 --- a/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/sdivrem64.r600.ll @@ -33,9 +33,9 @@ ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT -define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { %result = sdiv i64 %x, %y - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -72,31 +72,31 @@ define amdgpu_kernel void @s_test_sdiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -define amdgpu_kernel void @s_test_srem(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { %result = urem i64 %x, %y - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } ;EG-LABEL: {{^}}test_sdiv3264: ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_sdiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_sdiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 33 %2 = ashr i64 %y, 33 %result = sdiv i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } ;EG-LABEL: {{^}}test_srem3264: ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_srem3264(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 33 %2 = ashr i64 %y, 33 %result = srem i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -106,11 +106,11 @@ define amdgpu_kernel void @test_srem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) ;EG: FLT_TO_INT ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_sdiv2464(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 40 %2 = ashr i64 %y, 40 %result = sdiv i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -120,10 +120,10 @@ define amdgpu_kernel void @test_sdiv2464(i64 addrspace(1)* %out, i64 %x, i64 %y) ;EG: FLT_TO_INT ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_srem2464(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_srem2464(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = ashr i64 %x, 40 %2 = ashr i64 %y, 40 %result = srem i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/set-dx10.ll b/llvm/test/CodeGen/AMDGPU/set-dx10.ll index 9a317a87540f4a..6bec5c87a36cda 100644 --- a/llvm/test/CodeGen/AMDGPU/set-dx10.ll +++ b/llvm/test/CodeGen/AMDGPU/set-dx10.ll @@ -8,13 +8,13 @@ ; CHECK: LSHR ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_une_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp une float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -22,11 +22,11 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_une_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp une float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -34,13 +34,13 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_oeq_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -48,11 +48,11 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_oeq_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp oeq float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -60,13 +60,13 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_ogt_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -74,11 +74,11 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_ogt_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ogt float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -86,13 +86,13 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_oge_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -100,11 +100,11 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_oge_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp oge float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -112,13 +112,13 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_ole_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -126,11 +126,11 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_ole_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp ole float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } @@ -138,13 +138,13 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_olt_select_fptosi(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00 %2 = fneg float %1 %3 = fptosi float %2 to i32 - store i32 %3, i32 addrspace(1)* %out + store i32 %3, ptr addrspace(1) %out ret void } @@ -152,10 +152,10 @@ entry: ; CHECK: LSHR ; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) -define amdgpu_kernel void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { +define amdgpu_kernel void @fcmp_olt_select_i32(ptr addrspace(1) %out, float %in) { entry: %0 = fcmp olt float %in, 5.0 %1 = select i1 %0, i32 -1, i32 0 - store i32 %1, i32 addrspace(1)* %out + store i32 %1, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll index 7ac4e1d9fe4b08..6eafce674f2af7 100644 --- a/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll +++ b/llvm/test/CodeGen/AMDGPU/sext-in-reg-failure-r600.ll @@ -11,10 +11,10 @@ ; EG: LSHR {{\*?}} [[ADDR]] ; Works with the align 2 removed -define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(<2 x i32> addrspace(1)* %out, <2 x i32> %a, <2 x i32> %b) nounwind { +define amdgpu_kernel void @sext_in_reg_v2i1_in_v2i32_other_amount(ptr addrspace(1) %out, <2 x i32> %a, <2 x i32> %b) nounwind { %c = add <2 x i32> %a, %b %x = shl <2 x i32> %c, %y = ashr <2 x i32> %x, - store <2 x i32> %y, <2 x i32> addrspace(1)* %out, align 2 + store <2 x i32> %y, ptr addrspace(1) %out, align 2 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll index 7cf380520d428d..0e7e9a0fbbc9d3 100644 --- a/llvm/test/CodeGen/AMDGPU/swizzle-export.ll +++ b/llvm/test/CodeGen/AMDGPU/swizzle-export.ll @@ -12,56 +12,56 @@ main_body: %1 = extractelement <4 x float> %reg1, i32 1 %2 = extractelement <4 x float> %reg1, i32 2 %3 = extractelement <4 x float> %reg1, i32 3 - %4 = load <4 x float>, <4 x float> addrspace(8)* null + %4 = load <4 x float>, ptr addrspace(8) null %5 = extractelement <4 x float> %4, i32 1 - %6 = load <4 x float>, <4 x float> addrspace(8)* null + %6 = load <4 x float>, ptr addrspace(8) null %7 = extractelement <4 x float> %6, i32 2 - %8 = load <4 x float>, <4 x float> addrspace(8)* null + %8 = load <4 x float>, ptr addrspace(8) null %9 = extractelement <4 x float> %8, i32 0 %10 = fmul float 0.000000e+00, %9 - %11 = load <4 x float>, <4 x float> addrspace(8)* null + %11 = load <4 x float>, ptr addrspace(8) null %12 = extractelement <4 x float> %11, i32 0 %13 = fmul float %5, %12 - %14 = load <4 x float>, <4 x float> addrspace(8)* null + %14 = load <4 x float>, ptr addrspace(8) null %15 = extractelement <4 x float> %14, i32 0 %16 = fmul float 0.000000e+00, %15 - %17 = load <4 x float>, <4 x float> addrspace(8)* null + %17 = load <4 x float>, ptr addrspace(8) null %18 = extractelement <4 x float> %17, i32 0 %19 = fmul float 0.000000e+00, %18 - %20 = load <4 x float>, <4 x float> addrspace(8)* null + %20 = load <4 x float>, ptr addrspace(8) null %21 = extractelement <4 x float> %20, i32 0 %22 = fmul float %7, %21 - %23 = load <4 x float>, <4 x float> addrspace(8)* null + %23 = load <4 x float>, ptr addrspace(8) null %24 = extractelement <4 x float> %23, i32 0 %25 = fmul float 0.000000e+00, %24 - %26 = load <4 x float>, <4 x float> addrspace(8)* null + %26 = load <4 x float>, ptr addrspace(8) null %27 = extractelement <4 x float> %26, i32 0 %28 = fmul float 0.000000e+00, %27 - %29 = load <4 x float>, <4 x float> addrspace(8)* null + %29 = load <4 x float>, ptr addrspace(8) null %30 = extractelement <4 x float> %29, i32 0 %31 = fmul float 0.000000e+00, %30 - %32 = load <4 x float>, <4 x float> addrspace(8)* null + %32 = load <4 x float>, ptr addrspace(8) null %33 = extractelement <4 x float> %32, i32 0 %34 = fmul float 0.000000e+00, %33 - %35 = load <4 x float>, <4 x float> addrspace(8)* null + %35 = load <4 x float>, ptr addrspace(8) null %36 = extractelement <4 x float> %35, i32 0 %37 = fmul float 0.000000e+00, %36 - %38 = load <4 x float>, <4 x float> addrspace(8)* null + %38 = load <4 x float>, ptr addrspace(8) null %39 = extractelement <4 x float> %38, i32 0 %40 = fmul float 1.000000e+00, %39 - %41 = load <4 x float>, <4 x float> addrspace(8)* null + %41 = load <4 x float>, ptr addrspace(8) null %42 = extractelement <4 x float> %41, i32 0 %43 = fmul float 0.000000e+00, %42 - %44 = load <4 x float>, <4 x float> addrspace(8)* null + %44 = load <4 x float>, ptr addrspace(8) null %45 = extractelement <4 x float> %44, i32 0 %46 = fmul float 0.000000e+00, %45 - %47 = load <4 x float>, <4 x float> addrspace(8)* null + %47 = load <4 x float>, ptr addrspace(8) null %48 = extractelement <4 x float> %47, i32 0 %49 = fmul float 0.000000e+00, %48 - %50 = load <4 x float>, <4 x float> addrspace(8)* null + %50 = load <4 x float>, ptr addrspace(8) null %51 = extractelement <4 x float> %50, i32 0 %52 = fmul float 0.000000e+00, %51 - %53 = load <4 x float>, <4 x float> addrspace(8)* null + %53 = load <4 x float>, ptr addrspace(8) null %54 = extractelement <4 x float> %53, i32 0 %55 = fmul float 1.000000e+00, %54 %56 = insertelement <4 x float> undef, float %0, i32 0 @@ -102,12 +102,12 @@ main_body: %1 = extractelement <4 x float> %reg1, i32 1 %2 = fadd float %0, 2.5 %3 = fmul float %1, 3.5 - %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1) + %4 = load <4 x float>, ptr addrspace(8) getelementptr ([1024 x <4 x float>], ptr addrspace(8) null, i64 0, i32 1) %5 = extractelement <4 x float> %4, i32 0 %6 = call float @llvm.cos.f32(float %5) - %7 = load <4 x float>, <4 x float> addrspace(8)* null + %7 = load <4 x float>, ptr addrspace(8) null %8 = extractelement <4 x float> %7, i32 0 - %9 = load <4 x float>, <4 x float> addrspace(8)* null + %9 = load <4 x float>, ptr addrspace(8) null %10 = extractelement <4 x float> %9, i32 1 %11 = insertelement <4 x float> undef, float %2, i32 0 %12 = insertelement <4 x float> %11, float %3, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll index 62cab48c65e99e..4cf1e64a8bd58c 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem64.r600.ll @@ -33,9 +33,9 @@ ;EG: BFE_UINT ;EG: BFE_UINT ;EG: BFE_UINT -define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_udiv(ptr addrspace(1) %out, i64 %x, i64 %y) { %result = udiv i64 %x, %y - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -72,31 +72,31 @@ define amdgpu_kernel void @test_udiv(i64 addrspace(1)* %out, i64 %x, i64 %y) { ;EG: BFE_UINT ;EG: BFE_UINT ;EG: AND_INT {{.*}}, 1, -define amdgpu_kernel void @test_urem(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_urem(ptr addrspace(1) %out, i64 %x, i64 %y) { %result = urem i64 %x, %y - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } ;EG-LABEL: {{^}}test_udiv3264: ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_udiv3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_udiv3264(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 33 %2 = lshr i64 %y, 33 %result = udiv i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } ;EG-LABEL: {{^}}test_urem3264: ;EG: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_urem3264(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 33 %2 = lshr i64 %y, 33 %result = urem i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -106,11 +106,11 @@ define amdgpu_kernel void @test_urem3264(i64 addrspace(1)* %out, i64 %x, i64 %y) ;EG: FLT_TO_UINT ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_udiv2364(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 41 %2 = lshr i64 %y, 41 %result = udiv i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } @@ -120,17 +120,17 @@ define amdgpu_kernel void @test_udiv2364(i64 addrspace(1)* %out, i64 %x, i64 %y) ;EG: FLT_TO_UINT ;EG-NOT: RECIP_UINT ;EG-NOT: BFE_UINT -define amdgpu_kernel void @test_urem2364(i64 addrspace(1)* %out, i64 %x, i64 %y) { +define amdgpu_kernel void @test_urem2364(ptr addrspace(1) %out, i64 %x, i64 %y) { %1 = lshr i64 %x, 41 %2 = lshr i64 %y, 41 %result = urem i64 %1, %2 - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } ;EG-LABEL: {{^}}test_udiv_k: -define amdgpu_kernel void @test_udiv_k(i64 addrspace(1)* %out, i64 %x) { +define amdgpu_kernel void @test_udiv_k(ptr addrspace(1) %out, i64 %x) { %result = udiv i64 24, %x - store i64 %result, i64 addrspace(1)* %out + store i64 %result, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll index b7d766aa395ef8..90cc40fb6517ff 100644 --- a/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll +++ b/llvm/test/CodeGen/AMDGPU/v1i64-kernel-arg.ll @@ -1,15 +1,15 @@ ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck %s ; CHECK-LABEL: {{^}}kernel_arg_i64: -define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { - store i64 %a, i64 addrspace(1)* %out, align 8 +define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind { + store i64 %a, ptr addrspace(1) %out, align 8 ret void } ; i64 arg works, v1i64 arg does not. ; CHECK-LABEL: {{^}}kernel_arg_v1i64: -define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { - store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 +define amdgpu_kernel void @kernel_arg_v1i64(ptr addrspace(1) %out, <1 x i64> %a) nounwind { + store <1 x i64> %a, ptr addrspace(1) %out, align 8 ret void } diff --git a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll index f002a1474e0230..f6a642ecccd9ea 100644 --- a/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll +++ b/llvm/test/CodeGen/AMDGPU/vertex-fetch-encoding.ll @@ -6,9 +6,9 @@ ; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 ; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00 -define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { - %v = load i32, i32 addrspace(1)* %in - store i32 %v, i32 addrspace(1)* %out +define amdgpu_kernel void @vtx_fetch32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %v = load i32, ptr addrspace(1) %in + store i32 %v, ptr addrspace(1) %out ret void } @@ -16,9 +16,9 @@ define amdgpu_kernel void @vtx_fetch32(i32 addrspace(1)* %out, i32 addrspace(1)* ; EG: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x40,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x08,0x00 ; CM: VTX_READ_128 T[[DST:[0-9]]].XYZW, T[[SRC:[0-9]]].X, 0, #1 ; encoding: [0x40,0x01,0x0[[SRC]],0x00,0x0[[DST]],0x10,0x8d,0x18,0x00,0x00,0x00,0x00 -define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { - %v = load <4 x i32>, <4 x i32> addrspace(1)* %in - store <4 x i32> %v, <4 x i32> addrspace(1)* %out +define amdgpu_kernel void @vtx_fetch128(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %v = load <4 x i32>, ptr addrspace(1) %in + store <4 x i32> %v, ptr addrspace(1) %out ret void } @@ -26,9 +26,9 @@ define amdgpu_kernel void @vtx_fetch128(<4 x i32> addrspace(1)* %out, <4 x i32> ; EG: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x10,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x08,0x00 ; CM: VTX_READ_32 T[[GPR:[0-9]]].X, T[[GPR]].X, 0, #3 ; encoding: [0x40,0x03,0x0[[GPR]],0x00,0x0[[GPR]],0xf0,0x5f,0x13,0x00,0x00,0x00,0x00 -define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace(7)* %in) { - %v = load i32, i32 addrspace(7)* %in - store i32 %v, i32 addrspace(1)* %out +define amdgpu_kernel void @vtx_fetch32_id3(ptr addrspace(1) %out, ptr addrspace(7) %in) { + %v = load i32, ptr addrspace(7) %in + store i32 %v, ptr addrspace(1) %out ret void } @@ -38,9 +38,9 @@ define amdgpu_kernel void @vtx_fetch32_id3(i32 addrspace(1)* %out, i32 addrspace @t = internal addrspace(4) constant [4 x i32] [i32 0, i32 1, i32 2, i32 3] -define amdgpu_kernel void @vtx_fetch32_id2(i32 addrspace(1)* %out, i32 %in) { - %a = getelementptr inbounds [4 x i32], [4 x i32] addrspace(4)* @t, i32 0, i32 %in - %v = load i32, i32 addrspace(4)* %a - store i32 %v, i32 addrspace(1)* %out +define amdgpu_kernel void @vtx_fetch32_id2(ptr addrspace(1) %out, i32 %in) { + %a = getelementptr inbounds [4 x i32], ptr addrspace(4) @t, i32 0, i32 %in + %v = load i32, ptr addrspace(4) %a + store i32 %v, ptr addrspace(1) %out ret void } diff --git a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll index 88ef9fd93c8f1f..3067ac75340d55 100644 --- a/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll +++ b/llvm/test/CodeGen/AMDGPU/xfail.r600.bitcast.ll @@ -5,16 +5,16 @@ ; TODO: enable doubles ; FUNC-LABEL: {{^}}bitcast_f64_to_v2i32: -define amdgpu_kernel void @bitcast_f64_to_v2i32(<2 x i32> addrspace(1)* %out, double addrspace(1)* %in) { - %val = load double, double addrspace(1)* %in, align 8 +define amdgpu_kernel void @bitcast_f64_to_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { + %val = load double, ptr addrspace(1) %in, align 8 %add = fadd double %val, 4.0 %bc = bitcast double %add to <2 x i32> - store <2 x i32> %bc, <2 x i32> addrspace(1)* %out, align 8 + store <2 x i32> %bc, ptr addrspace(1) %out, align 8 ret void } ; FUNC-LABEL: {{^}}bitcast_v2i64_to_v2f64: -define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, <2 x double> addrspace(1)* %out, <2 x i64> %value) { +define amdgpu_kernel void @bitcast_v2i64_to_v2f64(i32 %cond, ptr addrspace(1) %out, <2 x i64> %value) { entry: %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %if, label %end @@ -25,12 +25,12 @@ if: end: %phi = phi <2 x double> [zeroinitializer, %entry], [%cast, %if] - store <2 x double> %phi, <2 x double> addrspace(1)* %out + store <2 x double> %phi, ptr addrspace(1) %out ret void } ; FUNC-LABEL: {{^}}bitcast_v2f64_to_v2i64: -define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, <2 x i64> addrspace(1)* %out, <2 x double> %value) { +define amdgpu_kernel void @bitcast_v2f64_to_v2i64(i32 %cond, ptr addrspace(1) %out, <2 x double> %value) { entry: %cmp0 = icmp eq i32 %cond, 0 br i1 %cmp0, label %if, label %end @@ -41,6 +41,6 @@ if: end: %phi = phi <2 x i64> [zeroinitializer, %entry], [%cast, %if] - store <2 x i64> %phi, <2 x i64> addrspace(1)* %out + store <2 x i64> %phi, ptr addrspace(1) %out ret void }