| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \ | ||
| // RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \ | ||
| // RUN: -o - | FileCheck %s | ||
|
|
||
| #define __device__ __attribute__((device)) | ||
| typedef __attribute__((address_space(3))) float *LP; | ||
|
|
||
| // CHECK-LABEL: define spir_func void @_Z22test_ds_atomic_add_f32Pff( | ||
| // CHECK-SAME: ptr addrspace(4) noundef [[ADDR:%.*]], float noundef [[VAL:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(4), align 8 | ||
| // CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4 | ||
| // CHECK-NEXT: [[RTN:%.*]] = alloca ptr addrspace(4), align 8 | ||
| // CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[ADDR_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr [[VAL_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[RTN_ASCAST:%.*]] = addrspacecast ptr [[RTN]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store ptr addrspace(4) [[ADDR]], ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: store float [[VAL]], ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3) | ||
| // CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4 | ||
| // CHECK-NEXT: [[TMP3:%.*]] = call contract addrspace(4) float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) [[TMP1]], float [[TMP2]], i32 0, i32 0, i1 false) | ||
| // CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[RTN_ASCAST]], align 8 | ||
| // CHECK-NEXT: store float [[TMP3]], ptr addrspace(4) [[TMP4]], align 4 | ||
| // CHECK-NEXT: ret void | ||
| // | ||
| __device__ void test_ds_atomic_add_f32(float *addr, float val) { | ||
| float *rtn; | ||
| *rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,129 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py | ||
| // REQUIRES: amdgpu-registered-target | ||
| // REQUIRES: x86-registered-target | ||
|
|
||
| // RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "spirv64-amd-amdhsa" \ | ||
| // RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s | ||
|
|
||
| #include "Inputs/cuda.h" | ||
|
|
||
| // CHECK-LABEL: @_Z8test_argPDF16bDF16b( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 | ||
| // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr [[BF16]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: store bfloat [[TMP0]], ptr addrspace(4) [[BF16_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(4) [[BF16_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(4) [[TMP2]], align 2 | ||
| // CHECK-NEXT: ret void | ||
| // | ||
| __device__ void test_arg(__bf16 *out, __bf16 in) { | ||
| __bf16 bf16 = in; | ||
| *out = bf16; | ||
| } | ||
|
|
||
| // CHECK-LABEL: @_Z9test_loadPDF16bS_( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8 | ||
| // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(4), align 8 | ||
| // CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr [[BF16]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: store ptr addrspace(4) [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IN_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(4) [[TMP0]], align 2 | ||
| // CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(4) [[BF16_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr addrspace(4) [[BF16_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8 | ||
| // CHECK-NEXT: store bfloat [[TMP2]], ptr addrspace(4) [[TMP3]], align 2 | ||
| // CHECK-NEXT: ret void | ||
| // | ||
| __device__ void test_load(__bf16 *out, __bf16 *in) { | ||
| __bf16 bf16 = *in; | ||
| *out = bf16; | ||
| } | ||
|
|
||
| // CHECK-LABEL: @_Z8test_retDF16b( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: ret bfloat [[TMP0]] | ||
| // | ||
| __device__ __bf16 test_ret( __bf16 in) { | ||
| return in; | ||
| } | ||
|
|
||
| // CHECK-LABEL: @_Z9test_callDF16b( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2 | ||
| // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2 | ||
| // CHECK-NEXT: [[CALL:%.*]] = call contract spir_func noundef addrspace(4) bfloat @_Z8test_retDF16b(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]] | ||
| // CHECK-NEXT: ret bfloat [[CALL]] | ||
| // | ||
| __device__ __bf16 test_call( __bf16 in) { | ||
| return test_ret(in); | ||
| } | ||
|
|
||
|
|
||
| // CHECK-LABEL: @_Z15test_vec_assignv( | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[VEC2_A:%.*]] = alloca <2 x bfloat>, align 4 | ||
| // CHECK-NEXT: [[VEC2_B:%.*]] = alloca <2 x bfloat>, align 4 | ||
| // CHECK-NEXT: [[VEC4_A:%.*]] = alloca <4 x bfloat>, align 8 | ||
| // CHECK-NEXT: [[VEC4_B:%.*]] = alloca <4 x bfloat>, align 8 | ||
| // CHECK-NEXT: [[VEC8_A:%.*]] = alloca <8 x bfloat>, align 16 | ||
| // CHECK-NEXT: [[VEC8_B:%.*]] = alloca <8 x bfloat>, align 16 | ||
| // CHECK-NEXT: [[VEC16_A:%.*]] = alloca <16 x bfloat>, align 32 | ||
| // CHECK-NEXT: [[VEC16_B:%.*]] = alloca <16 x bfloat>, align 32 | ||
| // CHECK-NEXT: [[VEC2_A_ASCAST:%.*]] = addrspacecast ptr [[VEC2_A]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC2_B_ASCAST:%.*]] = addrspacecast ptr [[VEC2_B]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC4_A_ASCAST:%.*]] = addrspacecast ptr [[VEC4_A]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC4_B_ASCAST:%.*]] = addrspacecast ptr [[VEC4_B]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC8_A_ASCAST:%.*]] = addrspacecast ptr [[VEC8_A]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC8_B_ASCAST:%.*]] = addrspacecast ptr [[VEC8_B]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC16_A_ASCAST:%.*]] = addrspacecast ptr [[VEC16_A]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[VEC16_B_ASCAST:%.*]] = addrspacecast ptr [[VEC16_B]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr addrspace(4) [[VEC2_B_ASCAST]], align 4 | ||
| // CHECK-NEXT: store <2 x bfloat> [[TMP0]], ptr addrspace(4) [[VEC2_A_ASCAST]], align 4 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(4) [[VEC4_B_ASCAST]], align 8 | ||
| // CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr addrspace(4) [[VEC4_A_ASCAST]], align 8 | ||
| // CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr addrspace(4) [[VEC8_B_ASCAST]], align 16 | ||
| // CHECK-NEXT: store <8 x bfloat> [[TMP2]], ptr addrspace(4) [[VEC8_A_ASCAST]], align 16 | ||
| // CHECK-NEXT: [[TMP3:%.*]] = load <16 x bfloat>, ptr addrspace(4) [[VEC16_B_ASCAST]], align 32 | ||
| // CHECK-NEXT: store <16 x bfloat> [[TMP3]], ptr addrspace(4) [[VEC16_A_ASCAST]], align 32 | ||
| // CHECK-NEXT: ret void | ||
| // | ||
| __device__ void test_vec_assign() { | ||
| typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2; | ||
| bf16_x2 vec2_a, vec2_b; | ||
| vec2_a = vec2_b; | ||
|
|
||
| typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4; | ||
| bf16_x4 vec4_a, vec4_b; | ||
| vec4_a = vec4_b; | ||
|
|
||
| typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8; | ||
| bf16_x8 vec8_a, vec8_b; | ||
| vec8_a = vec8_b; | ||
|
|
||
| typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16; | ||
| bf16_x16 vec16_a, vec16_b; | ||
| vec16_a = vec16_b; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s | ||
|
|
||
| // CHECK-LABEL: define spir_func void @_Z1fv( | ||
| // CHECK-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] { | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[X:%.*]] = alloca half, align 2 | ||
| // CHECK-NEXT: [[Y:%.*]] = alloca half, align 2 | ||
| // CHECK-NEXT: [[Z:%.*]] = alloca half, align 2 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[X]], align 2 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[Y]], align 2 | ||
| // CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]] | ||
| // CHECK-NEXT: store half [[ADD]], ptr [[Z]], align 2 | ||
| // CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[X]], align 2 | ||
| // CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[Y]], align 2 | ||
| // CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP2]], [[TMP3]] | ||
| // CHECK-NEXT: store half [[SUB]], ptr [[Z]], align 2 | ||
| // CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[X]], align 2 | ||
| // CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[Y]], align 2 | ||
| // CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP4]], [[TMP5]] | ||
| // CHECK-NEXT: store half [[MUL]], ptr [[Z]], align 2 | ||
| // CHECK-NEXT: [[TMP6:%.*]] = load half, ptr [[X]], align 2 | ||
| // CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[Y]], align 2 | ||
| // CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP6]], [[TMP7]] | ||
| // CHECK-NEXT: store half [[DIV]], ptr [[Z]], align 2 | ||
| // CHECK-NEXT: ret void | ||
| // | ||
| void f() { | ||
| _Float16 x, y, z; | ||
|
|
||
| z = x + y; | ||
|
|
||
| z = x - y; | ||
|
|
||
| z = x * y; | ||
|
|
||
| z = x / y; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s | ||
|
|
||
| // Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which | ||
| // is 64 bits long on Linux and 32 bits long on Windows. The return type of the | ||
| // ballot intrinsic needs to be a 64 bit integer on both platforms. This test | ||
| // cross-compiles to Windows to confirm that the return type is indeed 64 bits | ||
| // on Windows. | ||
|
|
||
| #define __device__ __attribute__((device)) | ||
|
|
||
| // CHECK-LABEL: define spir_func noundef i64 @_Z3fooi( | ||
| // CHECK-SAME: i32 noundef [[P:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] { | ||
| // CHECK-NEXT: entry: | ||
| // CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8 | ||
| // CHECK-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4 | ||
| // CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4) | ||
| // CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr [[P_ADDR]] to ptr addrspace(4) | ||
| // CHECK-NEXT: store i32 [[P]], ptr addrspace(4) [[P_ADDR_ASCAST]], align 4 | ||
| // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[P_ADDR_ASCAST]], align 4 | ||
| // CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0 | ||
| // CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.ballot.i64(i1 [[TOBOOL]]) | ||
| // CHECK-NEXT: ret i64 [[TMP1]] | ||
| // | ||
| __device__ unsigned long long foo(int p) { | ||
| return __builtin_amdgcn_ballot_w64(p); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,46 @@ | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -fcuda-is-device -emit-llvm %s \ | ||
| // RUN: -o - | FileCheck %s | ||
|
|
||
| constexpr static int OpCtrl() | ||
| { | ||
| return 15 + 1; | ||
| } | ||
|
|
||
| constexpr static int RowMask() | ||
| { | ||
| return 3 + 1; | ||
| } | ||
|
|
||
| constexpr static int BankMask() | ||
| { | ||
| return 2 + 1; | ||
| } | ||
|
|
||
| constexpr static bool BountCtrl() | ||
| { | ||
| return true & false; | ||
| } | ||
|
|
||
| // CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false) | ||
| __attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b) | ||
| { | ||
| *out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false); | ||
| } | ||
|
|
||
| // CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false) | ||
| __attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b) | ||
| { | ||
| *out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false); | ||
| } | ||
|
|
||
| // CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false) | ||
| __attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b) | ||
| { | ||
| *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false); | ||
| } | ||
|
|
||
| // CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false) | ||
| __attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b) | ||
| { | ||
| *out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl()); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,15 @@ | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s | ||
|
|
||
| #define __device__ __attribute__((device)) | ||
|
|
||
| // CHECK-LABEL: @_Z2d0DF16_ | ||
| // CHECK: fpext | ||
| __device__ float d0(_Float16 x) { | ||
| return x; | ||
| } | ||
|
|
||
| // CHECK-LABEL: @_Z2d1f | ||
| // CHECK: fptrunc | ||
| __device__ _Float16 d1(float x) { | ||
| return x; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,16 @@ | ||
| // REQUIRES: amdgpu-registered-target | ||
| // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s | ||
|
|
||
| // CHECK-LABEL: @use_flat_scratch_name | ||
| kernel void use_flat_scratch_name() | ||
| { | ||
| // CHECK: tail call{{.*}} void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"() | ||
| __asm__ volatile("s_mov_b64 flat_scratch, 0" : : : "flat_scratch"); | ||
|
|
||
| // CHECK: tail call{{.*}} void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"() | ||
| __asm__ volatile("s_mov_b32 flat_scratch_lo, 0" : : : "flat_scratch_lo"); | ||
|
|
||
| // CHECK: tail call{{.*}} void asm sideeffect "s_mov_b32 flat_scratch_hi, 0", "~{flat_scratch_hi}"() | ||
| __asm__ volatile("s_mov_b32 flat_scratch_hi, 0" : : : "flat_scratch_hi"); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| // RUN: %clang -### --target=armv7-unknown-none-eabi -mcpu=cortex-m4 --sysroot= -fuse-ld=ld %s 2>&1 | FileCheck --check-prefix=NOLTO %s | ||
| // NOLTO: {{".*ld.*"}} {{.*}} | ||
| // NOLTO-NOT: "-plugin-opt=mcpu" | ||
|
|
||
| // RUN: %clang -### --target=armv7-unknown-none-eabi -mcpu=cortex-m4 --sysroot= -fuse-ld=ld -flto -O3 %s 2>&1 | FileCheck --check-prefix=LTO %s | ||
| // LTO: {{".*ld.*"}} {{.*}} "-plugin-opt=mcpu=cortex-m4" "-plugin-opt=O3" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,2 +1,2 @@ | ||
| // RUN: touch %t.o | ||
| // RUN: %clang -Werror -Wno-msvc-not-found --param ssp-buffer-size=1 %t.o -### |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,112 @@ | ||
| // Testing that changing a declaration in an unused module file won't change | ||
| // the BMI of the current module file. | ||
| // | ||
| // RUN: rm -rf %t | ||
| // RUN: split-file %s %t | ||
| // | ||
| // RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm | ||
| // RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \ | ||
| // RUN: %t/m-partA.v1.pcm | ||
| // RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm | ||
| // RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \ | ||
| // RUN: -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm | ||
| // RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \ | ||
| // RUN: -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm | ||
| // | ||
| // RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \ | ||
| // RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \ | ||
| // RUN: -fmodule-file=m:partB=%t/m-partB.pcm | ||
| // RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \ | ||
| // RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \ | ||
| // RUN: -fmodule-file=m:partB=%t/m-partB.pcm | ||
| // Since useBOnly only uses partB from module M, the change in partA shouldn't affect | ||
| // useBOnly. | ||
| // RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null | ||
|
|
||
| //--- m-partA.cppm | ||
| export module m:partA; | ||
|
|
||
| namespace A_Impl { | ||
| inline int getAImpl() { | ||
| return 43; | ||
| } | ||
|
|
||
| inline int getA2Impl() { | ||
| return 43; | ||
| } | ||
| } | ||
|
|
||
| namespace A { | ||
| using A_Impl::getAImpl; | ||
| } | ||
|
|
||
| export inline int getA() { | ||
| return 43; | ||
| } | ||
|
|
||
| export inline int getA2(int) { | ||
| return 88; | ||
| } | ||
|
|
||
| //--- m-partA.v1.cppm | ||
| export module m:partA; | ||
|
|
||
| namespace A_Impl { | ||
| inline int getAImpl() { | ||
| return 43; | ||
| } | ||
|
|
||
| inline int getA2Impl() { | ||
| return 43; | ||
| } | ||
| } | ||
|
|
||
| namespace A { | ||
| using A_Impl::getAImpl; | ||
| // Adding a new declaration without introducing a new declaration name. | ||
| using A_Impl::getA2Impl; | ||
| } | ||
|
|
||
| inline int getA() { | ||
| return 43; | ||
| } | ||
|
|
||
| inline int getA2(int) { | ||
| return 88; | ||
| } | ||
|
|
||
| // Now we add a new declaration without introducing new identifier and new types. | ||
| // The consuming module which didn't use m:partA completely is expected to be | ||
| // not changed. | ||
| inline int getA(int) { | ||
| return 88; | ||
| } | ||
|
|
||
| //--- m-partB.cppm | ||
| export module m:partB; | ||
|
|
||
| export inline int getB() { | ||
| return 430; | ||
| } | ||
|
|
||
| //--- m.cppm | ||
| export module m; | ||
| export import :partA; | ||
| export import :partB; | ||
|
|
||
| //--- useBOnly.cppm | ||
| export module useBOnly; | ||
| import m; | ||
|
|
||
| export inline int get() { | ||
| return getB(); | ||
| } | ||
|
|
||
| //--- useAOnly.cppm | ||
| export module useAOnly; | ||
| import m; | ||
|
|
||
| export inline int get() { | ||
| A<int> a; | ||
| return a.getValue(); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,11 +1,14 @@ | ||
| // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -E %s -o - | FileCheck %s | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -E %s -o - | FileCheck %s --check-prefix=SPIRV-AMDGCN | ||
|
|
||
| // CHECK: has_s_memtime_inst | ||
| // SPIRV-AMDGCN: has_s_memtime_inst | ||
| #if __has_builtin(__builtin_amdgcn_s_memtime) | ||
| int has_s_memtime_inst; | ||
| #endif | ||
|
|
||
| // CHECK-NOT: has_gfx10_inst | ||
| // SPIRV-AMDGCN: has_gfx10_inst | ||
| #if __has_builtin(__builtin_amdgcn_mov_dpp8) | ||
| int has_gfx10_inst; | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,25 @@ | ||
| // RUN: %clang_cc1 %s -x hip -fcuda-is-device -o - \ | ||
| // RUN: -triple=spirv64-amd-amdhsa -fsyntax-only \ | ||
| // RUN: -verify=dev | ||
| // RUN: %clang_cc1 %s -x hip -triple x86_64 -o - \ | ||
| // RUN: -aux-triple spirv64-amd-amdhsa -fsyntax-only \ | ||
| // RUN: -verify=host | ||
|
|
||
| // dev-no-diagnostics | ||
|
|
||
| void test_host() { | ||
| __UINT32_TYPE__ val32; | ||
| __UINT64_TYPE__ val64; | ||
|
|
||
| // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function}} | ||
| val32 = __builtin_amdgcn_atomic_inc32(&val32, val32, __ATOMIC_SEQ_CST, ""); | ||
|
|
||
| // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function}} | ||
| val64 = __builtin_amdgcn_atomic_inc64(&val64, val64, __ATOMIC_SEQ_CST, ""); | ||
|
|
||
| // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function}} | ||
| val32 = __builtin_amdgcn_atomic_dec32(&val32, val32, __ATOMIC_SEQ_CST, ""); | ||
|
|
||
| // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function}} | ||
| val64 = __builtin_amdgcn_atomic_dec64(&val64, val64, __ATOMIC_SEQ_CST, ""); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| // REQUIRES: amdgpu-registered-target | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s | ||
|
|
||
| #pragma OPENCL EXTENSION cl_khr_fp64 : enable | ||
|
|
||
| kernel void test () { | ||
|
|
||
| int sgpr = 0, vgpr = 0, imm = 0; | ||
|
|
||
| // sgpr constraints | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : ); | ||
|
|
||
| __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}} | ||
| __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}} | ||
| __asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}} | ||
|
|
||
| // vgpr constraints | ||
| __asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : ); | ||
|
|
||
| // 'I' constraint (an immediate integer in the range -16 to 64) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (imm) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-16) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (64) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-17) : ); // expected-error {{value '-17' out of range for constraint 'I'}} | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (65) : ); // expected-error {{value '65' out of range for constraint 'I'}} | ||
|
|
||
| // 'J' constraint (an immediate 16-bit signed integer) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (imm) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32768) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32767) : ); | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32769) : ); // expected-error {{value '-32769' out of range for constraint 'J'}} | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32768) : ); // expected-error {{value '32768' out of range for constraint 'J'}} | ||
|
|
||
| // 'A' constraint (an immediate constant that can be inlined) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "A" (imm) : ); | ||
|
|
||
| // 'B' constraint (an immediate 32-bit signed integer) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "B" (imm) : ); | ||
|
|
||
| // 'C' constraint (an immediate 32-bit unsigned integer or 'A' constraint) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "C" (imm) : ); | ||
|
|
||
| // 'DA' constraint (an immediate 64-bit constant that can be split into two 'A' constants) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DA" (imm) : ); | ||
|
|
||
| // 'DB' constraint (an immediate 64-bit constant that can be split into two 'B' constants) | ||
| __asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DB" (imm) : ); | ||
|
|
||
| } | ||
|
|
||
| __kernel void | ||
| test_float(const __global float *a, const __global float *b, __global float *c, unsigned i) | ||
| { | ||
| float ai = a[i]; | ||
| float bi = b[i]; | ||
| float ci; | ||
|
|
||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); | ||
| __asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}} | ||
|
|
||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}} | ||
|
|
||
| __asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : ); | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}} | ||
|
|
||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} | ||
| __asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}} | ||
| c[i] = ci; | ||
| } | ||
|
|
||
| __kernel void | ||
| test_double(const __global double *a, const __global double *b, __global double *c, unsigned i) | ||
| { | ||
| double ai = a[i]; | ||
| double bi = b[i]; | ||
| double ci; | ||
|
|
||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}a"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]}a' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:]}' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[:2]}' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2}' in asm}} | ||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[2:1]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[2:1]}' in asm}} | ||
|
|
||
| __asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}} | ||
|
|
||
| c[i] = ci; | ||
| } | ||
|
|
||
| void test_long(int arg0) { | ||
| long v15_16; | ||
| __asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(arg0)); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -fsyntax-only -verify %s | ||
| // RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -fsyntax-only -verify %s | ||
|
|
||
| // expected-no-diagnostics | ||
| typedef __float128 f128_t; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| // RUN: %clang_cc1 -x hip -std=c++11 -triple spirv64-amd-amdhsa -fcuda-is-device -verify -fsyntax-only %s | ||
|
|
||
| #include "Inputs/cuda.h" | ||
|
|
||
| __device__ int test_hip_atomic_load(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl) { | ||
| int val = __hip_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}} | ||
| val = __hip_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} | ||
| val = __hip_atomic_load(0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} | ||
| val = __hip_atomic_load(pi32, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}} | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}} | ||
| val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pi32, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} | ||
| val = __hip_atomic_load(pu32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pll, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(pull, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(fp, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| val = __hip_atomic_load(dbl, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| return val; | ||
| } | ||
|
|
||
| __device__ int test_hip_atomic_store(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl, | ||
| int i32, unsigned int u32, long long i64, unsigned long long u64, float f32, double f64) { | ||
| __hip_atomic_store(0); // expected-error {{too few arguments to function call, expected 4, have 1}} | ||
| __hip_atomic_store(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}} | ||
| __hip_atomic_store(0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} | ||
| __hip_atomic_store(pi32, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}} | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}} | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} | ||
| __hip_atomic_store(pi32, 0, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}} | ||
| __hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pu32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pll, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pull, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(fp, f32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(dbl, f64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pi32, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(pll, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(fp, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(fp, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(dbl, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| __hip_atomic_store(dbl, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| return 0; | ||
| } | ||
|
|
||
| __device__ bool test_hip_atomic_cmpxchg_weak(int *ptr, int val, int desired) { | ||
| bool flag = __hip_atomic_compare_exchange_weak(0); // expected-error {{too few arguments to function call, expected 6, have 1}} | ||
| flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 6, have 7}} | ||
| flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}} | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}, expected-warning {{null passed to a callee that requires a non-null argument}} | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, __HIP_MEMORY_SCOPE_SYSTEM); // expected-warning {{null passed to a callee that requires a non-null argument}} | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning {{failure memory order argument to atomic operation is invalid}} | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD); | ||
| return flag; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,108 @@ | ||
| ! Test lowering of SIZE/SIZEOF inquiry intrinsics with assumed-ranks | ||
| ! arguments. | ||
| ! RUN: bbc -emit-hlfir -o - %s -allow-assumed-rank | FileCheck %s | ||
|
|
||
|
|
||
| subroutine test_size_1(x) | ||
| real :: x(..) | ||
| call takes_integer(size(x)) | ||
| end subroutine | ||
|
|
||
| subroutine test_size_2(x) | ||
| real :: x(..) | ||
| call takes_integer(size(x, 2)) | ||
| end subroutine | ||
|
|
||
| subroutine test_size_3(x, d) | ||
| real :: x(..) | ||
| integer, optional :: d | ||
| call takes_integer(size(x, d)) | ||
| end subroutine | ||
|
|
||
| subroutine test_size_4(x) | ||
| real, allocatable :: x(..) | ||
| call takes_integer(size(x)) | ||
| end subroutine | ||
|
|
||
|
|
||
| ! CHECK-LABEL: func.func @_QPtest_size_1( | ||
| ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"}) { | ||
| ! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope | ||
| ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_size_1Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>) | ||
| ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_2]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_7:.*]] = fir.call @_FortranASize(%[[VAL_5]] | ||
| ! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i64) -> i32 | ||
| ! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) | ||
| ! CHECK: fir.call @_QPtakes_integer(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<i32>) -> () | ||
| ! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<i32>, i1 | ||
| ! CHECK: return | ||
| ! CHECK: } | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPtest_size_2( | ||
| ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"}) { | ||
| ! CHECK: %[[VAL_1:.*]] = fir.alloca i32 | ||
| ! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope | ||
| ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_size_2Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>) | ||
| ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 | ||
| ! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<i32> | ||
| ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<i32>) -> i64 | ||
| ! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64 | ||
| ! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64 | ||
| ! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (i32) { | ||
| ! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_13:.*]] = fir.call @_FortranASize(%[[VAL_11]] | ||
| ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32 | ||
| ! CHECK: fir.result %[[VAL_14]] : i32 | ||
| ! CHECK: } else { | ||
| ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32> | ||
| ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_20:.*]] = fir.call @_FortranASizeDim(%[[VAL_18]] | ||
| ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i64) -> i32 | ||
| ! CHECK: fir.result %[[VAL_21]] : i32 | ||
| ! CHECK: } | ||
| ! CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) | ||
| ! CHECK: fir.call @_QPtakes_integer(%[[VAL_22]]#1) fastmath<contract> : (!fir.ref<i32>) -> () | ||
| ! CHECK: hlfir.end_associate %[[VAL_22]]#1, %[[VAL_22]]#2 : !fir.ref<i32>, i1 | ||
| ! CHECK: return | ||
| ! CHECK: } | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPtest_size_3( | ||
| ! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"}, | ||
| ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "d", fir.optional}) { | ||
| ! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope | ||
| ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest_size_3Ed"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>) | ||
| ! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_size_3Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>) | ||
| ! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<i32>) -> i64 | ||
| ! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64 | ||
| ! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64 | ||
| ! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (i32) { | ||
| ! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_13:.*]] = fir.call @_FortranASize(%[[VAL_11]], | ||
| ! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32 | ||
| ! CHECK: fir.result %[[VAL_14]] : i32 | ||
| ! CHECK: } else { | ||
| ! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<i32> | ||
| ! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_20:.*]] = fir.call @_FortranASizeDim(%[[VAL_18]] | ||
| ! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i64) -> i32 | ||
| ! CHECK: fir.result %[[VAL_21]] : i32 | ||
| ! CHECK: } | ||
| ! CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) | ||
| ! CHECK: fir.call @_QPtakes_integer(%[[VAL_22]]#1) fastmath<contract> : (!fir.ref<i32>) -> () | ||
| ! CHECK: hlfir.end_associate %[[VAL_22]]#1, %[[VAL_22]]#2 : !fir.ref<i32>, i1 | ||
| ! CHECK: return | ||
| ! CHECK: } | ||
|
|
||
| ! CHECK-LABEL: func.func @_QPtest_size_4( | ||
| ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>> {fir.bindc_name = "x"}) { | ||
| ! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope | ||
| ! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_size_4Ex"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>) | ||
| ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>> | ||
| ! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<*:f32>>>) -> !fir.box<none> | ||
| ! CHECK: %[[VAL_8:.*]] = fir.call @_FortranASize(%[[VAL_6]] | ||
| ! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i64) -> i32 | ||
| ! CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_9]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1) | ||
| ! CHECK: fir.call @_QPtakes_integer(%[[VAL_10]]#1) fastmath<contract> : (!fir.ref<i32>) -> () | ||
| ! CHECK: hlfir.end_associate %[[VAL_10]]#1, %[[VAL_10]]#2 : !fir.ref<i32>, i1 | ||
| ! CHECK: return | ||
| ! CHECK: } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| ! Tests delayed privatization for `targets ... private(..)` for allocatables. | ||
|
|
||
| ! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \ | ||
| ! RUN: -o - %s 2>&1 | FileCheck %s | ||
| ! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \ | ||
| ! RUN: | FileCheck %s | ||
|
|
||
| subroutine target_allocatable | ||
| implicit none | ||
| integer, allocatable :: alloc_var | ||
|
|
||
| !$omp target private(alloc_var) | ||
| alloc_var = 10 | ||
| !$omp end target | ||
| end subroutine target_allocatable | ||
|
|
||
| ! CHECK-LABEL: omp.private {type = private} | ||
| ! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] : | ||
| ! CHECK-SAME: [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc { | ||
| ! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]): | ||
| ! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "alloc_var", {{.*}}} | ||
|
|
||
| ! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>> | ||
| ! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32> | ||
| ! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64 | ||
| ! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64 | ||
| ! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64 | ||
|
|
||
| ! CHECK-NEXT: fir.if %[[ALLOC_COND]] { | ||
| ! CHECK: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, {{.*}}} | ||
| ! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>> | ||
| ! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>> | ||
| ! CHECK-NEXT: } else { | ||
| ! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32> | ||
| ! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>> | ||
| ! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>> | ||
| ! CHECK-NEXT: } | ||
|
|
||
| ! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]] | ||
| ! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]]) | ||
|
|
||
| ! CHECK-NEXT: } dealloc { | ||
| ! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]): | ||
|
|
||
| ! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]] | ||
| ! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]] | ||
| ! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]] | ||
| ! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64 | ||
| ! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64 | ||
|
|
||
| ! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] { | ||
| ! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]] | ||
| ! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]] | ||
| ! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]] | ||
| ! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits | ||
| ! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]] | ||
| ! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]] | ||
| ! CHECK-NEXT: } | ||
|
|
||
| ! CHECK-NEXT: omp.yield | ||
| ! CHECK-NEXT: } | ||
|
|
||
|
|
||
| ! CHECK-LABEL: func.func @_QPtarget_allocatable() { | ||
|
|
||
| ! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> | ||
| ! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}} | ||
| ! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]] | ||
|
|
||
| ! CHECK: omp.target private( | ||
| ! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) { |