294 changes: 294 additions & 0 deletions clang/test/CodeGenCUDA/builtins-spirv-amdgcn.cu

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip \
// RUN: -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s

#define __device__ __attribute__((device))
typedef __attribute__((address_space(3))) float *LP;

// CHECK-LABEL: define spir_func void @_Z22test_ds_atomic_add_f32Pff(
// CHECK-SAME: ptr addrspace(4) noundef [[ADDR:%.*]], float noundef [[VAL:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca float, align 4
// CHECK-NEXT: [[RTN:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[ADDR_ADDR_ASCAST:%.*]] = addrspacecast ptr [[ADDR_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[VAL_ADDR_ASCAST:%.*]] = addrspacecast ptr [[VAL_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[RTN_ASCAST:%.*]] = addrspacecast ptr [[RTN]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[ADDR]], ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8
// CHECK-NEXT: store float [[VAL]], ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[ADDR_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[TMP0]] to ptr addrspace(3)
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(4) [[VAL_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call contract addrspace(4) float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) [[TMP1]], float [[TMP2]], i32 0, i32 0, i1 false)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[RTN_ASCAST]], align 8
// CHECK-NEXT: store float [[TMP3]], ptr addrspace(4) [[TMP4]], align 4
// CHECK-NEXT: ret void
//
__device__ void test_ds_atomic_add_f32(float *addr, float val) {
float *rtn;
*rtn = __builtin_amdgcn_ds_faddf((LP)addr, val, 0, 0, 0);
}
4 changes: 4 additions & 0 deletions clang/test/CodeGenCUDA/long-double.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
// RUN: -aux-triple x86_64-unknown-gnu-linux -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s 2>&1 | FileCheck %s

// RUN: %clang_cc1 -triple spirv64-amd-amdhsa \
// RUN: -aux-triple x86_64-unknown-gnu-linux -fcuda-is-device \
// RUN: -emit-llvm -o - -x hip %s 2>&1 | FileCheck %s

// RUN: %clang_cc1 -triple nvptx \
// RUN: -aux-triple x86_64-unknown-gnu-linux -fcuda-is-device \
// RUN: -emit-llvm -o - %s 2>&1 | FileCheck %s
Expand Down
129 changes: 129 additions & 0 deletions clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
// REQUIRES: amdgpu-registered-target
// REQUIRES: x86-registered-target

// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "spirv64-amd-amdhsa" \
// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -emit-llvm -o - %s | FileCheck %s

#include "Inputs/cuda.h"

// CHECK-LABEL: @_Z8test_argPDF16bDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr [[BF16]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: store bfloat [[TMP0]], ptr addrspace(4) [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(4) [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(4) [[TMP2]], align 2
// CHECK-NEXT: ret void
//
__device__ void test_arg(__bf16 *out, __bf16 in) {
__bf16 bf16 = in;
*out = bf16;
}

// CHECK-LABEL: @_Z9test_loadPDF16bS_(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr addrspace(4), align 8
// CHECK-NEXT: [[BF16:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr [[OUT_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: [[BF16_ASCAST:%.*]] = addrspacecast ptr [[BF16]] to ptr addrspace(4)
// CHECK-NEXT: store ptr addrspace(4) [[OUT:%.*]], ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store ptr addrspace(4) [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[IN_ADDR_ASCAST]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr addrspace(4) [[TMP0]], align 2
// CHECK-NEXT: store bfloat [[TMP1]], ptr addrspace(4) [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr addrspace(4) [[BF16_ASCAST]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(4) [[OUT_ADDR_ASCAST]], align 8
// CHECK-NEXT: store bfloat [[TMP2]], ptr addrspace(4) [[TMP3]], align 2
// CHECK-NEXT: ret void
//
__device__ void test_load(__bf16 *out, __bf16 *in) {
__bf16 bf16 = *in;
*out = bf16;
}

// CHECK-LABEL: @_Z8test_retDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: ret bfloat [[TMP0]]
//
__device__ __bf16 test_ret( __bf16 in) {
return in;
}

// CHECK-LABEL: @_Z9test_callDF16b(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr [[IN_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store bfloat [[IN:%.*]], ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr addrspace(4) [[IN_ADDR_ASCAST]], align 2
// CHECK-NEXT: [[CALL:%.*]] = call contract spir_func noundef addrspace(4) bfloat @_Z8test_retDF16b(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]]
// CHECK-NEXT: ret bfloat [[CALL]]
//
__device__ __bf16 test_call( __bf16 in) {
return test_ret(in);
}


// CHECK-LABEL: @_Z15test_vec_assignv(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[VEC2_A:%.*]] = alloca <2 x bfloat>, align 4
// CHECK-NEXT: [[VEC2_B:%.*]] = alloca <2 x bfloat>, align 4
// CHECK-NEXT: [[VEC4_A:%.*]] = alloca <4 x bfloat>, align 8
// CHECK-NEXT: [[VEC4_B:%.*]] = alloca <4 x bfloat>, align 8
// CHECK-NEXT: [[VEC8_A:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NEXT: [[VEC8_B:%.*]] = alloca <8 x bfloat>, align 16
// CHECK-NEXT: [[VEC16_A:%.*]] = alloca <16 x bfloat>, align 32
// CHECK-NEXT: [[VEC16_B:%.*]] = alloca <16 x bfloat>, align 32
// CHECK-NEXT: [[VEC2_A_ASCAST:%.*]] = addrspacecast ptr [[VEC2_A]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC2_B_ASCAST:%.*]] = addrspacecast ptr [[VEC2_B]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC4_A_ASCAST:%.*]] = addrspacecast ptr [[VEC4_A]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC4_B_ASCAST:%.*]] = addrspacecast ptr [[VEC4_B]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC8_A_ASCAST:%.*]] = addrspacecast ptr [[VEC8_A]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC8_B_ASCAST:%.*]] = addrspacecast ptr [[VEC8_B]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC16_A_ASCAST:%.*]] = addrspacecast ptr [[VEC16_A]] to ptr addrspace(4)
// CHECK-NEXT: [[VEC16_B_ASCAST:%.*]] = addrspacecast ptr [[VEC16_B]] to ptr addrspace(4)
// CHECK-NEXT: [[TMP0:%.*]] = load <2 x bfloat>, ptr addrspace(4) [[VEC2_B_ASCAST]], align 4
// CHECK-NEXT: store <2 x bfloat> [[TMP0]], ptr addrspace(4) [[VEC2_A_ASCAST]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load <4 x bfloat>, ptr addrspace(4) [[VEC4_B_ASCAST]], align 8
// CHECK-NEXT: store <4 x bfloat> [[TMP1]], ptr addrspace(4) [[VEC4_A_ASCAST]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr addrspace(4) [[VEC8_B_ASCAST]], align 16
// CHECK-NEXT: store <8 x bfloat> [[TMP2]], ptr addrspace(4) [[VEC8_A_ASCAST]], align 16
// CHECK-NEXT: [[TMP3:%.*]] = load <16 x bfloat>, ptr addrspace(4) [[VEC16_B_ASCAST]], align 32
// CHECK-NEXT: store <16 x bfloat> [[TMP3]], ptr addrspace(4) [[VEC16_A_ASCAST]], align 32
// CHECK-NEXT: ret void
//
__device__ void test_vec_assign() {
typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2;
bf16_x2 vec2_a, vec2_b;
vec2_a = vec2_b;

typedef __attribute__((ext_vector_type(4))) __bf16 bf16_x4;
bf16_x4 vec4_a, vec4_b;
vec4_a = vec4_b;

typedef __attribute__((ext_vector_type(8))) __bf16 bf16_x8;
bf16_x8 vec8_a, vec8_b;
vec8_a = vec8_b;

typedef __attribute__((ext_vector_type(16))) __bf16 bf16_x16;
bf16_x16 vec16_a, vec16_b;
vec16_a = vec16_b;
}
8 changes: 8 additions & 0 deletions clang/test/CodeGenCXX/debug-info-struct-align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,11 @@ struct MyType2 {
MyType2 mt2;

static_assert(alignof(MyType2) == 1, "alignof MyType2 is wrong");

#pragma pack(1)
struct MyType3 {
int m;
};
MyType3 mt3;

static_assert(alignof(MyType3) == 1, "alignof MyType3 is wrong");
38 changes: 38 additions & 0 deletions clang/test/CodeGenCXX/spirv-amdgcn-float16.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s

// CHECK-LABEL: define spir_func void @_Z1fv(
// CHECK-SAME: ) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[X:%.*]] = alloca half, align 2
// CHECK-NEXT: [[Y:%.*]] = alloca half, align 2
// CHECK-NEXT: [[Z:%.*]] = alloca half, align 2
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[X]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[Y]], align 2
// CHECK-NEXT: [[ADD:%.*]] = fadd half [[TMP0]], [[TMP1]]
// CHECK-NEXT: store half [[ADD]], ptr [[Z]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load half, ptr [[X]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = load half, ptr [[Y]], align 2
// CHECK-NEXT: [[SUB:%.*]] = fsub half [[TMP2]], [[TMP3]]
// CHECK-NEXT: store half [[SUB]], ptr [[Z]], align 2
// CHECK-NEXT: [[TMP4:%.*]] = load half, ptr [[X]], align 2
// CHECK-NEXT: [[TMP5:%.*]] = load half, ptr [[Y]], align 2
// CHECK-NEXT: [[MUL:%.*]] = fmul half [[TMP4]], [[TMP5]]
// CHECK-NEXT: store half [[MUL]], ptr [[Z]], align 2
// CHECK-NEXT: [[TMP6:%.*]] = load half, ptr [[X]], align 2
// CHECK-NEXT: [[TMP7:%.*]] = load half, ptr [[Y]], align 2
// CHECK-NEXT: [[DIV:%.*]] = fdiv half [[TMP6]], [[TMP7]]
// CHECK-NEXT: store half [[DIV]], ptr [[Z]], align 2
// CHECK-NEXT: ret void
//
void f() {
_Float16 x, y, z;

z = x + y;

z = x - y;

z = x * y;

z = x / y;
}
2 changes: 2 additions & 0 deletions clang/test/CodeGenHIP/hipspv-addr-spaces.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// RUN: %clang_cc1 -triple spirv64 -x hip -emit-llvm -fcuda-is-device \
// RUN: -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device \
// RUN: -o - %s | FileCheck %s

#define __device__ __attribute__((device))
#define __shared__ __attribute__((shared))
Expand Down
27 changes: 27 additions & 0 deletions clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

// Unlike OpenCL, HIP depends on the C++ interpration of "unsigned long", which
// is 64 bits long on Linux and 32 bits long on Windows. The return type of the
// ballot intrinsic needs to be a 64 bit integer on both platforms. This test
// cross-compiles to Windows to confirm that the return type is indeed 64 bits
// on Windows.

#define __device__ __attribute__((device))

// CHECK-LABEL: define spir_func noundef i64 @_Z3fooi(
// CHECK-SAME: i32 noundef [[P:%.*]]) addrspace(4) #[[ATTR0:[0-9]+]] {
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i64, align 8
// CHECK-NEXT: [[P_ADDR:%.*]] = alloca i32, align 4
// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr [[RETVAL]] to ptr addrspace(4)
// CHECK-NEXT: [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr [[P_ADDR]] to ptr addrspace(4)
// CHECK-NEXT: store i32 [[P]], ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) [[P_ADDR_ASCAST]], align 4
// CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
// CHECK-NEXT: [[TMP1:%.*]] = call addrspace(4) i64 @llvm.amdgcn.ballot.i64(i1 [[TOBOOL]])
// CHECK-NEXT: ret i64 [[TMP1]]
//
__device__ unsigned long long foo(int p) {
return __builtin_amdgcn_ballot_w64(p);
}
46 changes: 46 additions & 0 deletions clang/test/CodeGenHIP/spirv-amdgcn-dpp-const-fold.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -fcuda-is-device -emit-llvm %s \
// RUN: -o - | FileCheck %s

constexpr static int OpCtrl()
{
return 15 + 1;
}

constexpr static int RowMask()
{
return 3 + 1;
}

constexpr static int BankMask()
{
return 2 + 1;
}

constexpr static bool BountCtrl()
{
return true & false;
}

// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 16, i32 0, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_2(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, OpCtrl(), 0, 0, false);
}

// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 4, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_3(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, RowMask(), 0, false);
}

// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 3, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_4(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, BankMask(), false);
}

// CHECK: call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %1, i32 %2, i32 0, i32 0, i32 0, i1 false)
__attribute__((global)) void test_update_dpp_const_fold_imm_operand_5(int* out, int a, int b)
{
*out = __builtin_amdgcn_update_dpp(a, b, 0, 0, 0, BountCtrl());
}
15 changes: 15 additions & 0 deletions clang/test/CodeGenHIP/spirv-amdgcn-half.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -x hip -emit-llvm -fcuda-is-device -o - %s | FileCheck %s

#define __device__ __attribute__((device))

// CHECK-LABEL: @_Z2d0DF16_
// CHECK: fpext
__device__ float d0(_Float16 x) {
return x;
}

// CHECK-LABEL: @_Z2d1f
// CHECK: fptrunc
__device__ _Float16 d1(float x) {
return x;
}
7 changes: 4 additions & 3 deletions clang/test/CodeGenOpenCL/amdgcn-flat-scratch-name.cl
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s

// CHECK-LABEL: @use_flat_scratch_name
kernel void use_flat_scratch_name()
{
// CHECK: tail call void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"()
// CHECK: tail call{{.*}} void asm sideeffect "s_mov_b64 flat_scratch, 0", "~{flat_scratch}"()
__asm__ volatile("s_mov_b64 flat_scratch, 0" : : : "flat_scratch");

// CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"()
// CHECK: tail call{{.*}} void asm sideeffect "s_mov_b32 flat_scratch_lo, 0", "~{flat_scratch_lo}"()
__asm__ volatile("s_mov_b32 flat_scratch_lo, 0" : : : "flat_scratch_lo");

// CHECK: tail call void asm sideeffect "s_mov_b32 flat_scratch_hi, 0", "~{flat_scratch_hi}"()
// CHECK: tail call{{.*}} void asm sideeffect "s_mov_b32 flat_scratch_hi, 0", "~{flat_scratch_hi}"()
__asm__ volatile("s_mov_b32 flat_scratch_hi, 0" : : : "flat_scratch_hi");
}
13 changes: 7 additions & 6 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx10.cl
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,45 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1011 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s

typedef unsigned int uint;
typedef unsigned long ulong;

// CHECK-LABEL: @test_permlane16(
// CHECK: call i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlane16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlane16(a, b, c, d, 0, 0);
}

// CHECK-LABEL: @test_permlanex16(
// CHECK: call i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlanex16(i32 %a, i32 %b, i32 %c, i32 %d, i1 false, i1 false)
void test_permlanex16(global uint* out, uint a, uint b, uint c, uint d) {
*out = __builtin_amdgcn_permlanex16(a, b, c, d, 0, 0);
}

// CHECK-LABEL: @test_mov_dpp8(
// CHECK: call i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.mov.dpp8.i32(i32 %a, i32 1)
void test_mov_dpp8(global uint* out, uint a) {
*out = __builtin_amdgcn_mov_dpp8(a, 1);
}

// CHECK-LABEL: @test_s_memtime
// CHECK: call i64 @llvm.amdgcn.s.memtime()
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memtime();
}

// CHECK-LABEL: @test_groupstaticsize
// CHECK: call i32 @llvm.amdgcn.groupstaticsize()
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
void test_groupstaticsize(global uint* out)
{
*out = __builtin_amdgcn_groupstaticsize();
}

// CHECK-LABEL: @test_ballot_wave32(
// CHECK: call i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.ballot.i32(i1 %{{.+}})
void test_ballot_wave32(global uint* out, int a, int b)
{
*out = __builtin_amdgcn_ballot_w32(a == b);
Expand Down
13 changes: 7 additions & 6 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,27 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1150 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s

typedef unsigned int uint;
typedef unsigned long ulong;
typedef uint uint2 __attribute__((ext_vector_type(2)));
typedef uint uint4 __attribute__((ext_vector_type(4)));

// CHECK-LABEL: @test_s_sendmsg_rtn(
// CHECK: call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
void test_s_sendmsg_rtn(global uint* out) {
*out = __builtin_amdgcn_s_sendmsg_rtn(0);
}

// CHECK-LABEL: @test_s_sendmsg_rtnl(
// CHECK: call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 0)
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 0)
void test_s_sendmsg_rtnl(global ulong* out) {
*out = __builtin_amdgcn_s_sendmsg_rtnl(0);
}

// CHECK-LABEL: @test_ds_bvh_stack_rtn(
// CHECK: %0 = tail call { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
// CHECK: %0 = tail call{{.*}} { i32, i32 } @llvm.amdgcn.ds.bvh.stack.rtn(i32 %addr, i32 %data, <4 x i32> %data1, i32 128)
// CHECK: %1 = extractvalue { i32, i32 } %0, 0
// CHECK: %2 = extractvalue { i32, i32 } %0, 1
// CHECK: %3 = insertelement <2 x i32> poison, i32 %1, i64 0
Expand All @@ -36,19 +37,19 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
}

// CHECK-LABEL: @test_permlane64(
// CHECK: call i32 @llvm.amdgcn.permlane64(i32 %a)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.permlane64(i32 %a)
void test_permlane64(global uint* out, uint a) {
*out = __builtin_amdgcn_permlane64(a);
}

// CHECK-LABEL: @test_s_wait_event_export_ready
// CHECK: call void @llvm.amdgcn.s.wait.event.export.ready
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.wait.event.export.ready
void test_s_wait_event_export_ready() {
__builtin_amdgcn_s_wait_event_export_ready();
}

// CHECK-LABEL: @test_global_add_f32
// CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}})
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}})
void test_global_add_f32(float *rtn, global float *addr, float x) {
*rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);
}
55 changes: 34 additions & 21 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-vi.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3,152 +3,165 @@
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx900 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1012 -emit-llvm -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -emit-llvm -o - %s | FileCheck %s

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

typedef unsigned long ulong;
typedef unsigned int uint;

// CHECK-LABEL: @test_div_fixup_f16
// CHECK: call half @llvm.amdgcn.div.fixup.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.div.fixup.f16
void test_div_fixup_f16(global half* out, half a, half b, half c)
{
*out = __builtin_amdgcn_div_fixuph(a, b, c);
}

// CHECK-LABEL: @test_rcp_f16
// CHECK: call half @llvm.amdgcn.rcp.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rcp.f16
void test_rcp_f16(global half* out, half a)
{
*out = __builtin_amdgcn_rcph(a);
}

// CHECK-LABEL: @test_sqrt_f16
// CHECK: call half @llvm.sqrt.f16
// CHECK: {{.*}}call{{.*}} half @llvm.{{((amdgcn.){0,1})}}sqrt.f16
void test_sqrt_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sqrth(a);
}

// CHECK-LABEL: @test_rsq_f16
// CHECK: call half @llvm.amdgcn.rsq.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.rsq.f16
void test_rsq_f16(global half* out, half a)
{
*out = __builtin_amdgcn_rsqh(a);
}

// CHECK-LABEL: @test_sin_f16
// CHECK: call half @llvm.amdgcn.sin.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.sin.f16
void test_sin_f16(global half* out, half a)
{
*out = __builtin_amdgcn_sinh(a);
}

// CHECK-LABEL: @test_cos_f16
// CHECK: call half @llvm.amdgcn.cos.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.cos.f16
void test_cos_f16(global half* out, half a)
{
*out = __builtin_amdgcn_cosh(a);
}

// CHECK-LABEL: @test_ldexp_f16
// CHECK: [[TRUNC:%[0-9a-z]+]] = trunc i32
// CHECK: call half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
// CHECK: {{.*}}call{{.*}} half @llvm.ldexp.f16.i16(half %a, i16 [[TRUNC]])
void test_ldexp_f16(global half* out, half a, int b)
{
*out = __builtin_amdgcn_ldexph(a, b);
}

// CHECK-LABEL: @test_frexp_mant_f16
// CHECK: call half @llvm.amdgcn.frexp.mant.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.frexp.mant.f16
void test_frexp_mant_f16(global half* out, half a)
{
*out = __builtin_amdgcn_frexp_manth(a);
}

// CHECK-LABEL: @test_frexp_exp_f16
// CHECK: call i16 @llvm.amdgcn.frexp.exp.i16.f16
// CHECK: {{.*}}call{{.*}} i16 @llvm.amdgcn.frexp.exp.i16.f16
void test_frexp_exp_f16(global short* out, half a)
{
*out = __builtin_amdgcn_frexp_exph(a);
}

// CHECK-LABEL: @test_fract_f16
// CHECK: call half @llvm.amdgcn.fract.f16
// CHECK: {{.*}}call{{.*}} half @llvm.amdgcn.fract.f16
void test_fract_f16(global half* out, half a)
{
*out = __builtin_amdgcn_fracth(a);
}

// CHECK-LABEL: @test_class_f16
// CHECK: call i1 @llvm.amdgcn.class.f16
// CHECK: {{.*}}call{{.*}} i1 @llvm.amdgcn.class.f16
void test_class_f16(global half* out, half a, int b)
{
*out = __builtin_amdgcn_classh(a, b);
}

// CHECK-LABEL: @test_s_memrealtime
// CHECK: call i64 @llvm.amdgcn.s.memrealtime()
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memrealtime()
void test_s_memrealtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memrealtime();
}

// CHECK-LABEL: @test_s_dcache_wb()
// CHECK: call void @llvm.amdgcn.s.dcache.wb()
// CHECK: {{.*}}call{{.*}} void @llvm.amdgcn.s.dcache.wb()
void test_s_dcache_wb()
{
__builtin_amdgcn_s_dcache_wb();
}

// CHECK-LABEL: @test_mov_dpp
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %src, i32 0, i32 0, i32 0, i1 false)
void test_mov_dpp(global int* out, int src)
{
*out = __builtin_amdgcn_mov_dpp(src, 0, 0, 0, false);
}

// CHECK-LABEL: @test_update_dpp
// CHECK: call i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.update.dpp.i32(i32 %arg1, i32 %arg2, i32 0, i32 0, i32 0, i1 false)
void test_update_dpp(global int* out, int arg1, int arg2)
{
*out = __builtin_amdgcn_update_dpp(arg1, arg2, 0, 0, 0, false);
}

// CHECK-LABEL: @test_ds_fadd
// CHECK: call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
#if !defined(__SPIRV__)
void test_ds_faddf(local float *out, float src) {
#else
void test_ds_faddf(__attribute__((address_space(3))) float *out, float src) {
#endif
*out = __builtin_amdgcn_ds_faddf(out, src, 0, 0, false);
}

// CHECK-LABEL: @test_ds_fmin
// CHECK: call float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.ds.fmin.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
#if !defined(__SPIRV__)
void test_ds_fminf(local float *out, float src) {
#else
void test_ds_fminf(__attribute__((address_space(3))) float *out, float src) {
#endif
*out = __builtin_amdgcn_ds_fminf(out, src, 0, 0, false);
}

// CHECK-LABEL: @test_ds_fmax
// CHECK: call float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
// CHECK: {{.*}}call{{.*}} float @llvm.amdgcn.ds.fmax.f32(ptr addrspace(3) %out, float %src, i32 0, i32 0, i1 false)
#if !defined(__SPIRV__)
void test_ds_fmaxf(local float *out, float src) {
#else
void test_ds_fmaxf(__attribute__((address_space(3))) float *out, float src) {
#endif
*out = __builtin_amdgcn_ds_fmaxf(out, src, 0, 0, false);
}

// CHECK-LABEL: @test_s_memtime
// CHECK: call i64 @llvm.amdgcn.s.memtime()
// CHECK: {{.*}}call{{.*}} i64 @llvm.amdgcn.s.memtime()
void test_s_memtime(global ulong* out)
{
*out = __builtin_amdgcn_s_memtime();
}

// CHECK-LABEL: @test_perm
// CHECK: call i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.perm(i32 %a, i32 %b, i32 %s)
void test_perm(global uint* out, uint a, uint b, uint s)
{
*out = __builtin_amdgcn_perm(a, b, s);
}

// CHECK-LABEL: @test_groupstaticsize
// CHECK: call i32 @llvm.amdgcn.groupstaticsize()
// CHECK: {{.*}}call{{.*}} i32 @llvm.amdgcn.groupstaticsize()
void test_groupstaticsize(global uint* out)
{
*out = __builtin_amdgcn_groupstaticsize();
Expand Down
294 changes: 161 additions & 133 deletions clang/test/CodeGenOpenCL/builtins-amdgcn.cl

Large diffs are not rendered by default.

13 changes: 7 additions & 6 deletions clang/test/CodeGenOpenCL/inline-asm-amdgcn.cl
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -emit-llvm -O0 -o - -triple amdgcn %s | FileCheck %s
// RUN: %clang_cc1 -emit-llvm -O0 -o - -triple spirv64-amd-amdhsa %s | FileCheck %s

typedef float float32 __attribute__((ext_vector_type(32)));

kernel void test_long(int arg0) {
long v15_16;
// CHECK: call i64 asm sideeffect "v_lshlrev_b64 v[15:16], 0, $0", "={v[15:16]},v"
// CHECK: call{{.*}} i64 asm sideeffect "v_lshlrev_b64 v[15:16], 0, $0", "={v[15:16]},v"
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(arg0));
}

Expand All @@ -14,7 +15,7 @@ kernel void test_agpr() {
float reg_a;
float reg_b;
float32 reg_c;
// CHECK: call <32 x float> asm "v_mfma_f32_32x32x1f32 $0, $1, $2, $3", "=a,v,v,a,~{a0},~{a1},~{a2},~{a3},~{a4},~{a5},~{a6},~{a7},~{a8},~{a9},~{a10},~{a11},~{a12},~{a13},~{a14},~{a15},~{a16},~{a17},~{a18},~{a19},~{a20},~{a21},~{a22},~{a23},~{a24},~{a25},~{a26},~{a27},~{a28},~{a29},~{a30},~{a31}"
// CHECK: call{{.*}} <32 x float> asm "v_mfma_f32_32x32x1f32 $0, $1, $2, $3", "=a,v,v,a,~{a0},~{a1},~{a2},~{a3},~{a4},~{a5},~{a6},~{a7},~{a8},~{a9},~{a10},~{a11},~{a12},~{a13},~{a14},~{a15},~{a16},~{a17},~{a18},~{a19},~{a20},~{a21},~{a22},~{a23},~{a24},~{a25},~{a26},~{a27},~{a28},~{a29},~{a30},~{a31}"
__asm ("v_mfma_f32_32x32x1f32 %0, %1, %2, %3"
: "=a"(acc_c)
: "v"(reg_a), "v"(reg_b), "a"(reg_c)
Expand All @@ -23,12 +24,12 @@ kernel void test_agpr() {
"a16", "a17", "a18", "a19", "a20", "a21", "a22", "a23",
"a24", "a25", "a26", "a27", "a28", "a29", "a30", "a31");

// CHECK: call <32 x float> asm sideeffect "v_mfma_f32_32x32x1f32 a[0:31], $0, $1, a[0:31]", "={a[0:31]},v,v,{a[0:31]}"
// CHECK: call{{.*}} <32 x float> asm sideeffect "v_mfma_f32_32x32x1f32 a[0:31], $0, $1, a[0:31]", "={a[0:31]},v,v,{a[0:31]}"
__asm volatile("v_mfma_f32_32x32x1f32 a[0:31], %0, %1, a[0:31]"
: "={a[0:31]}"(acc_c)
: "v"(reg_a),"v"(reg_b), "{a[0:31]}"(reg_c));

// CHECK: call float asm "v_accvgpr_read_b32 $0, $1", "={a1},{a1}"
// CHECK: call{{.*}} float asm "v_accvgpr_read_b32 $0, $1", "={a1},{a1}"
__asm ("v_accvgpr_read_b32 %0, %1"
: "={a1}"(reg_a)
: "{a1}"(reg_b));
Expand All @@ -37,13 +38,13 @@ kernel void test_agpr() {
kernel void test_constraint_DA() {
const long x = 0x200000001;
int res;
// CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DA"(i64 8589934593)
// CHECK: call{{.*}} i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DA"(i64 8589934593)
__asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DA"(x));
}

kernel void test_constraint_DB() {
const long x = 0x200000001;
int res;
// CHECK: call i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DB"(i64 8589934593)
// CHECK: call{{.*}} i32 asm sideeffect "v_mov_b32 $0, $1 & 0xFFFFFFFF", "=v,^DB"(i64 8589934593)
__asm volatile("v_mov_b32 %0, %1 & 0xFFFFFFFF" : "=v"(res) : "DB"(x));
}
3 changes: 2 additions & 1 deletion clang/test/Driver/amdgpu-openmp-toolchain.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

// verify the tools invocations
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-llvm-bc"{{.*}}"-x" "c"
// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-target-cpu" "gfx906"{{.*}}"-fcuda-is-device"{{.*}}
// CHECK: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-unknown-linux-gnu"{{.*}}"-fcuda-is-device"{{.*}}"-target-cpu" "gfx906"
// CHECK: "-cc1" "-triple" "x86_64-unknown-linux-gnu"{{.*}}"-emit-obj"
// CHECK: clang-linker-wrapper{{.*}} "-o" "a.out"

Expand Down Expand Up @@ -63,6 +63,7 @@

// RUN: %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a:sramecc-:xnack+ \
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-TARGET-ID
// CHECK-TARGET-ID: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}} "-target-cpu" "gfx90a" "-target-feature" "-sramecc" "-target-feature" "+xnack"
// CHECK-TARGET-ID: clang-offload-packager{{.*}}arch=gfx90a:sramecc-:xnack+,kind=openmp,feature=-sramecc,feature=+xnack

// RUN: not %clang -### -target x86_64-pc-linux-gnu -fopenmp --offload-arch=gfx90a,gfx90a:xnack+ \
Expand Down
6 changes: 6 additions & 0 deletions clang/test/Driver/baremetal-ld.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
// RUN: %clang -### --target=armv7-unknown-none-eabi -mcpu=cortex-m4 --sysroot= -fuse-ld=ld %s 2>&1 | FileCheck --check-prefix=NOLTO %s
// NOLTO: {{".*ld.*"}} {{.*}}
// NOLTO-NOT: "-plugin-opt=mcpu"

// RUN: %clang -### --target=armv7-unknown-none-eabi -mcpu=cortex-m4 --sysroot= -fuse-ld=ld -flto -O3 %s 2>&1 | FileCheck --check-prefix=LTO %s
// LTO: {{".*ld.*"}} {{.*}} "-plugin-opt=mcpu=cortex-m4" "-plugin-opt=O3"
2 changes: 1 addition & 1 deletion clang/test/Driver/gcc-param.c
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
// RUN: touch %t.o
// RUN: %clang -Werror --param ssp-buffer-size=1 %t.o -###
// RUN: %clang -Werror -Wno-msvc-not-found --param ssp-buffer-size=1 %t.o -###
2 changes: 1 addition & 1 deletion clang/test/Misc/target-invalid-cpu-note.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Use CHECK-NEXT instead of multiple CHECK-SAME to ensure we will fail if there is anything extra in the output.
// RUN: not %clang_cc1 -triple armv5--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix ARM
// ARM: error: unknown target CPU 'not-a-cpu'
// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-m85, cortex-m52, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-x1, cortex-x1c, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift{{$}}
// ARM-NEXT: note: valid target CPU values are: arm8, arm810, strongarm, strongarm110, strongarm1100, strongarm1110, arm7tdmi, arm7tdmi-s, arm710t, arm720t, arm9, arm9tdmi, arm920, arm920t, arm922t, arm940t, ep9312, arm10tdmi, arm1020t, arm9e, arm946e-s, arm966e-s, arm968e-s, arm10e, arm1020e, arm1022e, arm926ej-s, arm1136j-s, arm1136jf-s, mpcore, mpcorenovfp, arm1176jz-s, arm1176jzf-s, arm1156t2-s, arm1156t2f-s, cortex-m0, cortex-m0plus, cortex-m1, sc000, cortex-a5, cortex-a7, cortex-a8, cortex-a9, cortex-a12, cortex-a15, cortex-a17, krait, cortex-r4, cortex-r4f, cortex-r5, cortex-r7, cortex-r8, cortex-r52, cortex-r52plus, sc300, cortex-m3, cortex-m4, cortex-m7, cortex-m23, cortex-m33, cortex-m35p, cortex-m55, cortex-m85, cortex-m52, cortex-a32, cortex-a35, cortex-a53, cortex-a55, cortex-a57, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78ae, cortex-a78c, cortex-a710, cortex-x1, cortex-x1c, neoverse-n1, neoverse-n2, neoverse-v1, cyclone, exynos-m3, exynos-m4, exynos-m5, kryo, iwmmxt, xscale, swift{{$}}

// RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
// AARCH64: error: unknown target CPU 'not-a-cpu'
Expand Down
112 changes: 112 additions & 0 deletions clang/test/Modules/no-transitive-decls-change.cppm
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Testing that changing a declaration in an unused module file won't change
// the BMI of the current module file.
//
// RUN: rm -rf %t
// RUN: split-file %s %t
//
// RUN: %clang_cc1 -std=c++20 %t/m-partA.cppm -emit-reduced-module-interface -o %t/m-partA.pcm
// RUN: %clang_cc1 -std=c++20 %t/m-partA.v1.cppm -emit-reduced-module-interface -o \
// RUN: %t/m-partA.v1.pcm
// RUN: %clang_cc1 -std=c++20 %t/m-partB.cppm -emit-reduced-module-interface -o %t/m-partB.pcm
// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.pcm \
// RUN: -fmodule-file=m:partA=%t/m-partA.pcm -fmodule-file=m:partB=%t/m-partB.pcm
// RUN: %clang_cc1 -std=c++20 %t/m.cppm -emit-reduced-module-interface -o %t/m.v1.pcm \
// RUN: -fmodule-file=m:partA=%t/m-partA.v1.pcm -fmodule-file=m:partB=%t/m-partB.pcm
//
// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.pcm \
// RUN: -fmodule-file=m=%t/m.pcm -fmodule-file=m:partA=%t/m-partA.pcm \
// RUN: -fmodule-file=m:partB=%t/m-partB.pcm
// RUN: %clang_cc1 -std=c++20 %t/useBOnly.cppm -emit-reduced-module-interface -o %t/useBOnly.v1.pcm \
// RUN: -fmodule-file=m=%t/m.v1.pcm -fmodule-file=m:partA=%t/m-partA.v1.pcm \
// RUN: -fmodule-file=m:partB=%t/m-partB.pcm
// Since useBOnly only uses partB from module M, the change in partA shouldn't affect
// useBOnly.
// RUN: diff %t/useBOnly.pcm %t/useBOnly.v1.pcm &> /dev/null

//--- m-partA.cppm
export module m:partA;

namespace A_Impl {
inline int getAImpl() {
return 43;
}

inline int getA2Impl() {
return 43;
}
}

namespace A {
using A_Impl::getAImpl;
}

export inline int getA() {
return 43;
}

export inline int getA2(int) {
return 88;
}

//--- m-partA.v1.cppm
export module m:partA;

namespace A_Impl {
inline int getAImpl() {
return 43;
}

inline int getA2Impl() {
return 43;
}
}

namespace A {
using A_Impl::getAImpl;
// Adding a new declaration without introducing a new declaration name.
using A_Impl::getA2Impl;
}

inline int getA() {
return 43;
}

inline int getA2(int) {
return 88;
}

// Now we add a new declaration without introducing new identifier and new types.
// The consuming module which didn't use m:partA completely is expected to be
// not changed.
inline int getA(int) {
return 88;
}

//--- m-partB.cppm
export module m:partB;

export inline int getB() {
return 430;
}

//--- m.cppm
export module m;
export import :partA;
export import :partB;

//--- useBOnly.cppm
export module useBOnly;
import m;

export inline int get() {
return getB();
}

//--- useAOnly.cppm
export module useAOnly;
import m;

export inline int get() {
A<int> a;
return a.getValue();
}
3 changes: 3 additions & 0 deletions clang/test/Preprocessor/hash_builtin.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx906 -E %s -o - | FileCheck %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -E %s -o - | FileCheck %s --check-prefix=SPIRV-AMDGCN

// CHECK: has_s_memtime_inst
// SPIRV-AMDGCN: has_s_memtime_inst
#if __has_builtin(__builtin_amdgcn_s_memtime)
int has_s_memtime_inst;
#endif

// CHECK-NOT: has_gfx10_inst
// SPIRV-AMDGCN: has_gfx10_inst
#if __has_builtin(__builtin_amdgcn_mov_dpp8)
int has_gfx10_inst;
#endif
1 change: 1 addition & 0 deletions clang/test/Preprocessor/predefined-macros-no-warnings.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple spir64
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple spirv32
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple spirv64
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple spirv64-amd-amdhsa
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm32
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm32-wasi
// RUN: %clang_cc1 %s -Eonly -Wsystem-headers -Werror -triple wasm32-emscripten
Expand Down
10 changes: 10 additions & 0 deletions clang/test/Preprocessor/predefined-macros.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,16 @@
// CHECK-SPIRV64-DAG: #define __SPIRV64__ 1
// CHECK-SPIRV64-NOT: #define __SPIRV32__ 1

// RUN: %clang_cc1 %s -E -dM -o - -x cl -triple spirv64-amd-amdhsa \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-SPIRV64-AMDGCN
// CHECK-SPIRV64-AMDGCN-DAG: #define __IMAGE_SUPPORT__ 1
// CHECK-SPIRV64-AMDGCN-DAG: #define __SPIRV__ 1
// CHECK-SPIRV64-AMDGCN-DAG: #define __SPIRV64__ 1
// CHECK-SPIRV64-AMDGCN-DAG: #define __AMD__ 1
// CHECK-SPIRV64-AMDGCN-DAG: #define __AMDGCN__ 1
// CHECK-SPIRV64-AMDGCN-DAG: #define __AMDGPU__ 1
// CHECK-SPIRV64-AMDGCN-NOT: #define __SPIRV32__ 1

// RUN: %clang_cc1 %s -E -dM -o - -x hip -triple x86_64-unknown-linux-gnu \
// RUN: | FileCheck -match-full-lines %s --check-prefix=CHECK-HIP
// CHECK-HIP: #define __HIPCC__ 1
Expand Down
25 changes: 25 additions & 0 deletions clang/test/Sema/builtin-spirv-amdgcn-atomic-inc-dec-failure.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// RUN: %clang_cc1 %s -x hip -fcuda-is-device -o - \
// RUN: -triple=spirv64-amd-amdhsa -fsyntax-only \
// RUN: -verify=dev
// RUN: %clang_cc1 %s -x hip -triple x86_64 -o - \
// RUN: -aux-triple spirv64-amd-amdhsa -fsyntax-only \
// RUN: -verify=host

// dev-no-diagnostics

void test_host() {
__UINT32_TYPE__ val32;
__UINT64_TYPE__ val64;

// host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function}}
val32 = __builtin_amdgcn_atomic_inc32(&val32, val32, __ATOMIC_SEQ_CST, "");

// host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function}}
val64 = __builtin_amdgcn_atomic_inc64(&val64, val64, __ATOMIC_SEQ_CST, "");

// host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function}}
val32 = __builtin_amdgcn_atomic_dec32(&val32, val32, __ATOMIC_SEQ_CST, "");

// host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function}}
val64 = __builtin_amdgcn_atomic_dec64(&val64, val64, __ATOMIC_SEQ_CST, "");
}
111 changes: 111 additions & 0 deletions clang/test/Sema/inline-asm-validate-spirv-amdgcn.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -fsyntax-only -verify %s

#pragma OPENCL EXTENSION cl_khr_fp64 : enable

kernel void test () {

int sgpr = 0, vgpr = 0, imm = 0;

// sgpr constraints
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "s" (imm) : );

__asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}" (imm) : );
__asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exe" (imm) : ); // expected-error {{invalid input constraint '{exe' in asm}}
__asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec" (imm) : ); // expected-error {{invalid input constraint '{exec' in asm}}
__asm__ ("s_mov_b32 %0, %1" : "={s1}" (sgpr) : "{exec}a" (imm) : ); // expected-error {{invalid input constraint '{exec}a' in asm}}

// vgpr constraints
__asm__ ("v_mov_b32 %0, %1" : "=v" (vgpr) : "v" (imm) : );

// 'I' constraint (an immediate integer in the range -16 to 64)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (imm) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-16) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (64) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (-17) : ); // expected-error {{value '-17' out of range for constraint 'I'}}
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "I" (65) : ); // expected-error {{value '65' out of range for constraint 'I'}}

// 'J' constraint (an immediate 16-bit signed integer)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (imm) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32768) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32767) : );
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (-32769) : ); // expected-error {{value '-32769' out of range for constraint 'J'}}
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "J" (32768) : ); // expected-error {{value '32768' out of range for constraint 'J'}}

// 'A' constraint (an immediate constant that can be inlined)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "A" (imm) : );

// 'B' constraint (an immediate 32-bit signed integer)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "B" (imm) : );

// 'C' constraint (an immediate 32-bit unsigned integer or 'A' constraint)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "C" (imm) : );

// 'DA' constraint (an immediate 64-bit constant that can be split into two 'A' constants)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DA" (imm) : );

// 'DB' constraint (an immediate 64-bit constant that can be split into two 'B' constants)
__asm__ ("s_mov_b32 %0, %1" : "=s" (sgpr) : "DB" (imm) : );

}

__kernel void
test_float(const __global float *a, const __global float *b, __global float *c, unsigned i)
{
float ai = a[i];
float bi = b[i];
float ci;

__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : );
__asm("v_add_f32_e32 v1, v2, v3" : ""(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "="(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={a}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v1a}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1a}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={va}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={va}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v1}a"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1}a' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v1"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '={v1' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "=v1}"(ci) : "{v2}"(ai), "{v3}"(bi) : ); // expected-error {{invalid output constraint '=v1}' in asm}}

__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : );
__asm("v_add_f32_e32 v1, v2, v3" : "={v[1}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v[1]"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[1]' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v[a]}"(ci) : "{v[2]}"(ai), "{v[3]}"(bi) : ); // expected-error {{invalid output constraint '={v[a]}' in asm}}

__asm("v_add_f32_e32 v1, v2, v3" : "=v"(ci) : "v"(ai), "v"(bi) : );
__asm("v_add_f32_e32 v1, v2, v3" : "=v1"(ci) : "v2"(ai), "v3"(bi) : ); /// expected-error {{invalid output constraint '=v1' in asm}}

__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{a}"(ai), "{v3}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
__asm("v_add_f32_e32 v1, v2, v3" : "={v1}"(ci) : "{v2}"(ai), "{a}"(bi) : ); // expected-error {{invalid input constraint '{a}' in asm}}
c[i] = ci;
}

__kernel void
test_double(const __global double *a, const __global double *b, __global double *c, unsigned i)
{
double ai = a[i];
double bi = b[i];
double ci;

__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : );
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v{[1:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '=v{[1:2]}' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]a}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]a}' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]}a"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]}a' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:]}' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[:2]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[:2]}' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2]"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2]' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[1:2}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[1:2}' in asm}}
__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "={v[2:1]}"(ci) : "{v[3:4]}"(ai), "{v[5:6]}"(bi) : ); //expected-error {{invalid output constraint '={v[2:1]}' in asm}}

__asm("v_add_f64_e64 v[1:2], v[3:4], v[5:6]" : "=v[1:2]"(ci) : "v[3:4]"(ai), "v[5:6]"(bi) : ); //expected-error {{invalid output constraint '=v[1:2]' in asm}}

c[i] = ci;
}

void test_long(int arg0) {
long v15_16;
__asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(arg0));
}
3 changes: 3 additions & 0 deletions clang/test/SemaCUDA/allow-int128.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -aux-triple x86_64-unknown-linux-gnu \
// RUN: -fcuda-is-device -verify -fsyntax-only %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa \
// RUN: -aux-triple x86_64-unknown-linux-gnu \
// RUN: -fcuda-is-device -verify -fsyntax-only %s
// RUN: %clang_cc1 -triple nvptx \
// RUN: -aux-triple x86_64-unknown-linux-gnu \
// RUN: -fcuda-is-device -verify -fsyntax-only %s
Expand Down
1 change: 1 addition & 0 deletions clang/test/SemaCUDA/amdgpu-f128.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -fcuda-is-device -fsyntax-only -verify %s

// expected-no-diagnostics
typedef __float128 f128_t;
1 change: 1 addition & 0 deletions clang/test/SemaCUDA/float16.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple amdgcn -verify %s
// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple spirv64-amd-amdhsa -verify %s
// RUN: %clang_cc1 -fsyntax-only -triple x86_64 -aux-triple nvptx64 -verify %s
// expected-no-diagnostics
#include "Inputs/cuda.h"
Expand Down
1 change: 1 addition & 0 deletions clang/test/SemaCUDA/fp16-arg-return.cu
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -o - -triple amdgcn-amd-amdhsa -fcuda-is-device -fsyntax-only -verify %s
// RUN: %clang_cc1 -o - -triple spirv64-amd-amdhsa -fcuda-is-device -fsyntax-only -verify %s

// expected-no-diagnostics

Expand Down
86 changes: 86 additions & 0 deletions clang/test/SemaCUDA/spirv-amdgcn-atomic-ops.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// RUN: %clang_cc1 -x hip -std=c++11 -triple spirv64-amd-amdhsa -fcuda-is-device -verify -fsyntax-only %s

#include "Inputs/cuda.h"

__device__ int test_hip_atomic_load(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl) {
int val = __hip_atomic_load(0); // expected-error {{too few arguments to function call, expected 3, have 1}}
val = __hip_atomic_load(0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}}
val = __hip_atomic_load(0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
val = __hip_atomic_load(pi32, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}}
val = __hip_atomic_load(pi32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pi32, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pi32, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pi32, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pi32, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}}
val = __hip_atomic_load(pu32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pll, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(pull, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(fp, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
val = __hip_atomic_load(dbl, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return val;
}

__device__ int test_hip_atomic_store(int *pi32, unsigned int *pu32, long long *pll, unsigned long long *pull, float *fp, double *dbl,
int i32, unsigned int u32, long long i64, unsigned long long u64, float f32, double f64) {
__hip_atomic_store(0); // expected-error {{too few arguments to function call, expected 4, have 1}}
__hip_atomic_store(0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 4, have 5}}
__hip_atomic_store(0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
__hip_atomic_store(pi32, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, 6); // expected-error {{synchronization scope argument to atomic operation is invalid}}
__hip_atomic_store(pi32, 0, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, 0, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, 0, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}}
__hip_atomic_store(pi32, 0, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}}
__hip_atomic_store(pi32, 0, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning{{memory order argument to atomic operation is invalid}}
__hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pu32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pll, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pull, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(fp, f32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(dbl, f64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, u32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pi32, u64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(pll, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(fp, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(fp, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(dbl, i64, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
__hip_atomic_store(dbl, i32, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return 0;
}

__device__ bool test_hip_atomic_cmpxchg_weak(int *ptr, int val, int desired) {
bool flag = __hip_atomic_compare_exchange_weak(0); // expected-error {{too few arguments to function call, expected 6, have 1}}
flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0, 0); // expected-error {{too many arguments to function call, expected 6, have 7}}
flag = __hip_atomic_compare_exchange_weak(0, 0, 0, 0, 0, 0); // expected-error {{address argument to atomic builtin must be a pointer ('int' invalid)}}
flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, 0); // expected-error {{synchronization scope argument to atomic operation is invalid}}, expected-warning {{null passed to a callee that requires a non-null argument}}
flag = __hip_atomic_compare_exchange_weak(ptr, 0, 0, 0, 0, __HIP_MEMORY_SCOPE_SYSTEM); // expected-warning {{null passed to a callee that requires a non-null argument}}
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_CONSUME, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQUIRE, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_ACQ_REL, __HIP_MEMORY_SCOPE_SINGLETHREAD); // expected-warning {{failure memory order argument to atomic operation is invalid}}
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_CONSUME, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
return flag;
}
2 changes: 1 addition & 1 deletion clang/test/SemaCXX/PR8755.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ struct A {

template <typename T>
void f() {
class A <T> ::iterator foo; // expected-error{{typedef 'iterator' cannot be referenced with a class specifier}}
class A <T> ::iterator foo; // expected-error{{typedef 'iterator' cannot be referenced with the 'class' specifier}}
}

void g() {
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaCXX/using-decl-templates.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ namespace aliastemplateinst {
template<typename T> struct A { };
template<typename T> using APtr = A<T*>; // expected-note{{previous use is here}}

template struct APtr<int>; // expected-error{{type alias template 'APtr' cannot be referenced with a struct specifier}}
template struct APtr<int>; // expected-error{{alias template 'APtr' cannot be referenced with the 'struct' specifier}}
}

namespace DontDiagnoseInvalidTest {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx908 -verify -S -o - %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -verify -S -o - %s

#pragma OPENCL EXTENSION cl_khr_fp64:enable

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx90a -verify -S -o - %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -verify -S -o - %s

#pragma OPENCL EXTENSION cl_khr_fp64:enable

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx940 -verify -S -o - %s
// RUN: %clang_cc1 -triple spirv64-amd-amdhsa -verify -S -o - %s

typedef float v2f __attribute__((ext_vector_type(2)));
typedef float v4f __attribute__((ext_vector_type(4)));
Expand Down
16 changes: 16 additions & 0 deletions clang/test/SemaTemplate/cwg2398.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,19 @@ namespace consistency {
// new-error@-1 {{ambiguous partial specializations}}
} // namespace t2
} // namespace consistency

namespace regression1 {
template <typename T, typename Y> struct map {};
template <typename T> class foo {};

template <template <typename...> class MapType, typename Value>
Value bar(MapType<int, Value> map);

template <template <typename...> class MapType, typename Value>
Value bar(MapType<int, foo<Value>> map);

void aux() {
map<int, foo<int>> input;
bar(input);
}
} // namespace regression1
2 changes: 1 addition & 1 deletion clang/test/SemaTemplate/template-id-expr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ template <template <typename> class D>
class E {
template class D<C>; // expected-error {{expected '<' after 'template'}}
template<> class D<C>; // expected-error {{cannot specialize a template template parameter}}
friend class D<C>; // expected-error {{type alias template 'D' cannot be referenced with a class specifier}}
friend class D<C>; // expected-error {{alias template 'D' cannot be referenced with the 'class' specifier}}
};
#if __cplusplus <= 199711L
// expected-warning@+2 {{extension}}
Expand Down
4 changes: 4 additions & 0 deletions clang/unittests/Interpreter/InterpreterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,9 @@ TEST_F(InterpreterTest, InstantiateTemplate) {
EXPECT_EQ(42, fn(NewA.getPtr()));
}

// This test exposes an ARM specific problem in the interpreter, see
// https://github.com/llvm/llvm-project/issues/94741.
#ifndef __arm__
TEST_F(InterpreterTest, Value) {
std::unique_ptr<Interpreter> Interp = createInterpreter();

Expand Down Expand Up @@ -379,5 +382,6 @@ TEST_F(InterpreterTest, Value) {
EXPECT_EQ(V9.getKind(), Value::K_PtrOrObj);
EXPECT_TRUE(V9.isManuallyAlloc());
}
#endif /* ifndef __arm__ */

} // end anonymous namespace
17 changes: 15 additions & 2 deletions clang/unittests/Tooling/Syntax/TokensTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -374,11 +374,24 @@ TEST_F(TokenCollectorTest, Locations) {

auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
for (auto &R : Code.ranges()) {
EXPECT_THAT(Buffer.spelledTokenAt(StartLoc.getLocWithOffset(R.Begin)),
Pointee(RangeIs(R)));
EXPECT_THAT(
Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)),
Pointee(RangeIs(R)));
}
}

TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) {
llvm::Annotations Code(R"cpp(
int foo = [[baa^aar]];
)cpp");
recordTokens(Code.code());
// Check spelled tokens.
auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
EXPECT_THAT(
Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())),
Pointee(RangeIs(Code.range())));
}

TEST_F(TokenCollectorTest, MacroDirectives) {
// Macro directives are not stored anywhere at the moment.
std::string Code = R"cpp(
Expand Down
9 changes: 8 additions & 1 deletion compiler-rt/test/lit.common.cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,13 @@ def is_binutils_lto_supported():
return True


def is_lld_lto_supported():
# LLD does support LTO, but we require it to be built with the latest
# changes to claim support. Otherwise older copies of LLD may not
# understand new bitcode versions.
return os.path.exists(os.path.join(config.llvm_tools_dir, "lld"))


def is_windows_lto_supported():
if not target_is_msvc:
return True
Expand All @@ -755,7 +762,7 @@ def is_windows_lto_supported():
config.lto_flags = ["-Wl,-lto_library," + liblto_path()]
elif config.host_os in ["Linux", "FreeBSD", "NetBSD"]:
config.lto_supported = False
if config.use_lld:
if config.use_lld and is_lld_lto_supported():
config.lto_supported = True
if is_binutils_lto_supported():
config.available_features.add("binutils_lto")
Expand Down
12 changes: 11 additions & 1 deletion flang/include/flang/Optimizer/Builder/BoxValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const ExtendedValue &);
/// substituted.
ExtendedValue substBase(const ExtendedValue &exv, mlir::Value base);

/// Is the extended value `exv` an array?
/// Is the extended value `exv` an array? Note that this returns true for
/// assumed-ranks that could actually be scalars at runtime.
bool isArray(const ExtendedValue &exv);

/// Get the type parameters for `exv`.
Expand Down Expand Up @@ -527,6 +528,15 @@ class ExtendedValue : public details::matcher<ExtendedValue> {
[](const auto &box) -> bool { return false; });
}

bool hasAssumedRank() const {
return match(
[](const fir::BoxValue &box) -> bool { return box.hasAssumedRank(); },
[](const fir::MutableBoxValue &box) -> bool {
return box.hasAssumedRank();
},
[](const auto &box) -> bool { return false; });
}

/// LLVM style debugging of extended values
LLVM_DUMP_METHOD void dump() const { llvm::errs() << *this << '\n'; }

Expand Down
1 change: 1 addition & 0 deletions flang/include/flang/Optimizer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ namespace fir {
#define GEN_PASS_DECL_OMPMAPINFOFINALIZATIONPASS
#define GEN_PASS_DECL_OMPMARKDECLARETARGETPASS
#define GEN_PASS_DECL_OMPFUNCTIONFILTERING
#define GEN_PASS_DECL_VSCALEATTR
#include "flang/Optimizer/Transforms/Passes.h.inc"

std::unique_ptr<mlir::Pass> createAffineDemotionPass();
Expand Down
1 change: 0 additions & 1 deletion flang/include/flang/Optimizer/Transforms/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,6 @@ def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
"std::pair<unsigned, unsigned>", /*default=*/"std::pair<unsigned, unsigned>{}",
"vector scale range">,
];
let constructor = "::fir::createVScaleAttrPass()";
}

def FunctionAttr : Pass<"function-attr", "mlir::func::FuncOp"> {
Expand Down
9 changes: 7 additions & 2 deletions flang/include/flang/Runtime/inquiry.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,18 @@ extern "C" {

std::int64_t RTDECL(LboundDim)(const Descriptor &array, int dim,
const char *sourceFile = nullptr, int line = 0);
void RTDECL(Ubound)(Descriptor &result, const Descriptor &array, int kind,
const char *sourceFile = nullptr, int line = 0);

void RTDECL(Shape)(void *result, const Descriptor &array, int kind);

std::int64_t RTDECL(Size)(
const Descriptor &array, const char *sourceFile = nullptr, int line = 0);

std::int64_t RTDECL(SizeDim)(const Descriptor &array, int dim,
const char *sourceFile = nullptr, int line = 0);

void RTDECL(Ubound)(Descriptor &result, const Descriptor &array, int kind,
const char *sourceFile = nullptr, int line = 0);

} // extern "C"
} // namespace Fortran::runtime
#endif // FORTRAN_RUNTIME_INQUIRY_H_
2 changes: 1 addition & 1 deletion flang/include/flang/Tools/CLOptions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm,
fir::createDebugPasses(pm, config.DebugInfo, config.OptLevel, inputFilename);

if (config.VScaleMin != 0)
pm.addPass(fir::createVScaleAttrPass({config.VScaleMin, config.VScaleMax}));
pm.addPass(fir::createVScaleAttr({{config.VScaleMin, config.VScaleMax}}));

// Add function attributes
fir::FunctionAttrTypes functionAttrs;
Expand Down
21 changes: 7 additions & 14 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,13 @@ DataSharingProcessor::DataSharingProcessor(
}

void DataSharingProcessor::processStep1(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
mlir::omp::PrivateClauseOps *clauseOps) {
collectSymbolsForPrivatization();
collectDefaultSymbols();
collectImplicitSymbols();
collectPreDeterminedSymbols();

privatize(clauseOps, privateSyms);
privatize(clauseOps);

insertBarrier();
}
Expand Down Expand Up @@ -415,16 +414,14 @@ void DataSharingProcessor::collectPreDeterminedSymbols() {
preDeterminedSymbols);
}

void DataSharingProcessor::privatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
void DataSharingProcessor::privatize(mlir::omp::PrivateClauseOps *clauseOps) {
for (const semantics::Symbol *sym : allPrivatizedSymbols) {
if (const auto *commonDet =
sym->detailsIf<semantics::CommonBlockDetails>()) {
for (const auto &mem : commonDet->objects())
doPrivatize(&*mem, clauseOps, privateSyms);
doPrivatize(&*mem, clauseOps);
} else
doPrivatize(sym, clauseOps, privateSyms);
doPrivatize(sym, clauseOps);
}
}

Expand All @@ -441,9 +438,8 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) {
}
}

void DataSharingProcessor::doPrivatize(
const semantics::Symbol *sym, mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms) {
void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps) {
if (!useDelayedPrivatization) {
cloneSymbol(sym);
copyFirstPrivateSymbol(sym);
Expand Down Expand Up @@ -548,9 +544,6 @@ void DataSharingProcessor::doPrivatize(
clauseOps->privateVars.push_back(hsb.getAddr());
}

if (privateSyms)
privateSyms->push_back(sym);

symToPrivatizer[sym] = privatizerOp;
}

Expand Down
18 changes: 9 additions & 9 deletions flang/lib/Lower/OpenMP/DataSharingProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,15 @@ class DataSharingProcessor {
void collectDefaultSymbols();
void collectImplicitSymbols();
void collectPreDeterminedSymbols();
void privatize(mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void privatize(mlir::omp::PrivateClauseOps *clauseOps);
void defaultPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void implicitPrivatize(
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void
doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms);
void doPrivatize(const semantics::Symbol *sym,
mlir::omp::PrivateClauseOps *clauseOps);
void copyLastPrivatize(mlir::Operation *op);
void insertLastPrivateCompare(mlir::Operation *op);
void cloneSymbol(const semantics::Symbol *sym);
Expand Down Expand Up @@ -147,15 +144,18 @@ class DataSharingProcessor {
// Step2 performs the copying for lastprivates and requires knowledge of the
// MLIR operation to insert the last private update. Step2 adds
// dealocation code as well.
void processStep1(
mlir::omp::PrivateClauseOps *clauseOps = nullptr,
llvm::SmallVectorImpl<const semantics::Symbol *> *privateSyms = nullptr);
void processStep1(mlir::omp::PrivateClauseOps *clauseOps = nullptr);
void processStep2(mlir::Operation *op, bool isLoop);

void setLoopIV(mlir::Value iv) {
assert(!loopIV && "Loop iteration variable already set");
loopIV = iv;
}

const llvm::SetVector<const semantics::Symbol *> &
getAllSymbolsToPrivatize() const {
return allPrivatizedSymbols;
}
};

} // namespace omp
Expand Down
118 changes: 94 additions & 24 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,33 @@ markDeclareTarget(mlir::Operation *op, lower::AbstractConverter &converter,
declareTargetOp.setDeclareTarget(deviceType, captureClause);
}

/// For an operation that takes `omp.private` values as region args, this util
/// merges the private vars info into the region arguments list.
///
/// \tparam OMPOP - the OpenMP op that takes `omp.private` inputs.
/// \tparam InfoTy - the type of private info we want to merge; e.g. mlir::Type
/// or mlir::Location fields of the private var list.
///
/// \param [in] op - the op accepting `omp.private` inputs.
/// \param [in] currentList - the current list of region info that we
/// want to merge private info with. For example this could be the list of types
/// or locations of previous arguments to \op's region.
/// \param [in] infoAccessor - for a private variable, this returns the
/// data we want to merge: type or location.
/// \param [out] allRegionArgsInfo - the merged list of region info.
template <typename OMPOp, typename InfoTy>
static void
mergePrivateVarsInfo(OMPOp op, llvm::ArrayRef<InfoTy> currentList,
llvm::function_ref<InfoTy(mlir::Value)> infoAccessor,
llvm::SmallVectorImpl<InfoTy> &allRegionArgsInfo) {
mlir::OperandRange privateVars = op.getPrivateVars();

llvm::transform(currentList, std::back_inserter(allRegionArgsInfo),
[](InfoTy i) { return i; });
llvm::transform(privateVars, std::back_inserter(allRegionArgsInfo),
infoAccessor);
}

//===----------------------------------------------------------------------===//
// Op body generation helper structures and functions
//===----------------------------------------------------------------------===//
Expand Down Expand Up @@ -758,15 +785,28 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
llvm::ArrayRef<const semantics::Symbol *> mapSyms,
llvm::ArrayRef<mlir::Location> mapSymLocs,
llvm::ArrayRef<mlir::Type> mapSymTypes,
DataSharingProcessor &dsp,
const mlir::Location &currentLocation,
const ConstructQueue &queue, ConstructQueue::iterator item) {
assert(mapSymTypes.size() == mapSymLocs.size());

fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Region &region = targetOp.getRegion();

auto *regionBlock =
firOpBuilder.createBlock(&region, {}, mapSymTypes, mapSymLocs);
llvm::SmallVector<mlir::Type> allRegionArgTypes;
mergePrivateVarsInfo(targetOp, mapSymTypes,
llvm::function_ref<mlir::Type(mlir::Value)>{
[](mlir::Value v) { return v.getType(); }},
allRegionArgTypes);

llvm::SmallVector<mlir::Location> allRegionArgLocs;
mergePrivateVarsInfo(targetOp, mapSymLocs,
llvm::function_ref<mlir::Location(mlir::Value)>{
[](mlir::Value v) { return v.getLoc(); }},
allRegionArgLocs);

auto *regionBlock = firOpBuilder.createBlock(&region, {}, allRegionArgTypes,
allRegionArgLocs);

// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
Expand Down Expand Up @@ -830,6 +870,20 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
});
}

for (auto [argIndex, argSymbol] :
llvm::enumerate(dsp.getAllSymbolsToPrivatize())) {
argIndex = mapSyms.size() + argIndex;

const mlir::BlockArgument &arg = region.getArgument(argIndex);
converter.bindSymbol(*argSymbol,
hlfir::translateToExtendedValue(
currentLocation, firOpBuilder, hlfir::Entity{arg},
/*contiguousHint=*/
evaluate::IsSimplyContiguous(
*argSymbol, converter.getFoldingContext()))
.first);
}

// Check if cloning the bounds introduced any dependency on the outer region.
// If so, then either clone them as well if they are MemoryEffectFree, or else
// copy them to a new temporary and add them to the map and block_argument
Expand Down Expand Up @@ -907,6 +961,8 @@ genBodyOfTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
} else {
genNestedEvaluations(converter, eval);
}

dsp.processStep2(targetOp, /*isLoop=*/false);
}

template <typename OpTy, typename... Args>
Expand Down Expand Up @@ -1048,15 +1104,18 @@ static void genTargetClauses(
devicePtrSyms);
cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes);
cp.processThreadLimit(stmtCtx, clauseOps);
// TODO Support delayed privatization.

if (processHostOnlyClauses)
cp.processNowait(clauseOps);

cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
clause::InReduction, clause::Private, clause::Reduction,
clause::InReduction, clause::Reduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);

// `target private(..)` is only supported in delayed privatization mode.
if (!enableDelayedPrivatizationStaging)
cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
}

static void genTargetDataClauses(
Expand Down Expand Up @@ -1289,7 +1348,6 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
lower::StatementContext stmtCtx;
mlir::omp::ParallelClauseOps clauseOps;
llvm::SmallVector<const semantics::Symbol *> privateSyms;
llvm::SmallVector<mlir::Type> reductionTypes;
llvm::SmallVector<const semantics::Symbol *> reductionSyms;
genParallelClauses(converter, semaCtx, stmtCtx, item->clauses, loc,
Expand Down Expand Up @@ -1319,34 +1377,35 @@ genParallelOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
/*useDelayedPrivatization=*/true, &symTable);

if (privatize)
dsp.processStep1(&clauseOps, &privateSyms);
dsp.processStep1(&clauseOps);

auto genRegionEntryCB = [&](mlir::Operation *op) {
auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op);

llvm::SmallVector<mlir::Location> reductionLocs(
clauseOps.reductionVars.size(), loc);

mlir::OperandRange privateVars = parallelOp.getPrivateVars();
mlir::Region &region = parallelOp.getRegion();

llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes;
privateVarTypes.reserve(privateVarTypes.size() + privateVars.size());
llvm::transform(privateVars, std::back_inserter(privateVarTypes),
[](mlir::Value v) { return v.getType(); });
llvm::SmallVector<mlir::Type> allRegionArgTypes;
mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionTypes),
llvm::function_ref<mlir::Type(mlir::Value)>{
[](mlir::Value v) { return v.getType(); }},
allRegionArgTypes);

llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs;
privateVarLocs.reserve(privateVarLocs.size() + privateVars.size());
llvm::transform(privateVars, std::back_inserter(privateVarLocs),
[](mlir::Value v) { return v.getLoc(); });
llvm::SmallVector<mlir::Location> allRegionArgLocs;
mergePrivateVarsInfo(parallelOp, llvm::ArrayRef(reductionLocs),
llvm::function_ref<mlir::Location(mlir::Value)>{
[](mlir::Value v) { return v.getLoc(); }},
allRegionArgLocs);

firOpBuilder.createBlock(&region, /*insertPt=*/{}, privateVarTypes,
privateVarLocs);
mlir::Region &region = parallelOp.getRegion();
firOpBuilder.createBlock(&region, /*insertPt=*/{}, allRegionArgTypes,
allRegionArgLocs);

llvm::SmallVector<const semantics::Symbol *> allSymbols = reductionSyms;
allSymbols.append(privateSyms);
allSymbols.append(dsp.getAllSymbolsToPrivatize().begin(),
dsp.getAllSymbolsToPrivatize().end());

for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) {
fir::ExtendedValue hostExV = converter.getSymbolExtendedValue(*arg);
converter.bindSymbol(*arg, hlfir::translateToExtendedValue(
loc, firOpBuilder, hlfir::Entity{prv},
/*contiguousHint=*/
Expand Down Expand Up @@ -1541,11 +1600,22 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,
deviceAddrLocs, deviceAddrTypes, devicePtrSyms,
devicePtrLocs, devicePtrTypes);

llvm::SmallVector<const semantics::Symbol *> privateSyms;
DataSharingProcessor dsp(converter, semaCtx, item->clauses, eval,
/*shouldCollectPreDeterminedSymbols=*/
lower::omp::isLastItemInQueue(item, queue),
/*useDelayedPrivatization=*/true, &symTable);
dsp.processStep1(&clauseOps);

// 5.8.1 Implicit Data-Mapping Attribute Rules
// The following code follows the implicit data-mapping rules to map all the
// symbols used inside the region that have not been explicitly mapped using
// the map clause.
// symbols used inside the region that do not have explicit data-environment
// attribute clauses (neither data-sharing; e.g. `private`, nor `map`
// clauses).
auto captureImplicitMap = [&](const semantics::Symbol &sym) {
if (dsp.getAllSymbolsToPrivatize().contains(&sym))
return;

if (llvm::find(mapSyms, &sym) == mapSyms.end()) {
mlir::Value baseOp = converter.getSymbolAddress(sym);
if (!baseOp)
Expand Down Expand Up @@ -1632,7 +1702,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable,

auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps);
genBodyOfTargetOp(converter, symTable, semaCtx, eval, targetOp, mapSyms,
mapLocs, mapTypes, loc, queue, item);
mapLocs, mapTypes, dsp, loc, queue, item);
return targetOp;
}

Expand Down
6 changes: 6 additions & 0 deletions flang/lib/Lower/OpenMP/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ llvm::cl::opt<bool> enableDelayedPrivatization(
"Emit `[first]private` variables as clauses on the MLIR ops."),
llvm::cl::init(false));

llvm::cl::opt<bool> enableDelayedPrivatizationStaging(
"openmp-enable-delayed-privatization-staging",
llvm::cl::desc("For partially supported constructs, emit `[first]private` "
"variables as clauses on the MLIR ops."),
llvm::cl::init(false));

namespace Fortran {
namespace lower {
namespace omp {
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Lower/OpenMP/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

extern llvm::cl::opt<bool> treatIndexAsSection;
extern llvm::cl::opt<bool> enableDelayedPrivatization;
extern llvm::cl::opt<bool> enableDelayedPrivatizationStaging;

namespace fir {
class FirOpBuilder;
Expand Down
19 changes: 9 additions & 10 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6121,9 +6121,6 @@ IntrinsicLibrary::genSize(mlir::Type resultType,
// Note that the value of the KIND argument is already reflected in the
// resultType
assert(args.size() == 3);
if (const auto *boxValue = args[0].getBoxOf<fir::BoxValue>())
if (boxValue->hasAssumedRank())
TODO(loc, "intrinsic: size with assumed rank argument");

// Get the ARRAY argument
mlir::Value array = builder.createBox(loc, args[0]);
Expand All @@ -6137,13 +6134,15 @@ IntrinsicLibrary::genSize(mlir::Type resultType,

// Get the DIM argument.
mlir::Value dim = fir::getBase(args[1]);
if (std::optional<std::int64_t> cstDim = fir::getIntIfConstant(dim)) {
// If it is a compile time constant, skip the runtime call.
return builder.createConvert(loc, resultType,
fir::factory::readExtent(builder, loc,
fir::BoxValue{array},
cstDim.value() - 1));
}
if (!args[0].hasAssumedRank())
if (std::optional<std::int64_t> cstDim = fir::getIntIfConstant(dim)) {
// If both DIM and the rank are compile time constants, skip the runtime
// call.
return builder.createConvert(
loc, resultType,
fir::factory::readExtent(builder, loc, fir::BoxValue{array},
cstDim.value() - 1));
}
if (!fir::isa_ref_type(dim.getType()))
return builder.createConvert(
loc, resultType, fir::runtime::genSizeDim(builder, loc, array, dim));
Expand Down
12 changes: 0 additions & 12 deletions flang/lib/Optimizer/Transforms/VScaleAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
#include <algorithm>

namespace fir {
#define GEN_PASS_DECL_VSCALEATTR
#define GEN_PASS_DEF_VSCALEATTR
#include "flang/Optimizer/Transforms/Passes.h.inc"
} // namespace fir
Expand Down Expand Up @@ -77,14 +76,3 @@ void VScaleAttrPass::runOnOperation() {

LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n");
}

std::unique_ptr<mlir::Pass>
fir::createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr) {
VScaleAttrOptions opts;
opts.vscaleRange = vscaleAttr;
return std::make_unique<VScaleAttrPass>(opts);
}

std::unique_ptr<mlir::Pass> fir::createVScaleAttrPass() {
return std::make_unique<VScaleAttrPass>();
}
19 changes: 19 additions & 0 deletions flang/runtime/inquiry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@

namespace Fortran::runtime {

template <int KIND> struct RawStoreIntegerAt {
RT_API_ATTRS void operator()(
void *contiguousIntegerArray, std::size_t at, std::int64_t value) const {
reinterpret_cast<Fortran::runtime::CppTypeFor<
Fortran::common::TypeCategory::Integer, KIND> *>(
contiguousIntegerArray)[at] = value;
}
};

extern "C" {
std::int64_t RTDEF(LboundDim)(
const Descriptor &array, int dim, const char *sourceFile, int line) {
Expand Down Expand Up @@ -76,5 +85,15 @@ std::int64_t RTDEF(SizeDim)(
return static_cast<std::int64_t>(dimension.Extent());
}

void RTDEF(Shape)(void *result, const Descriptor &array, int kind) {
Terminator terminator{__FILE__, __LINE__};
INTERNAL_CHECK(array.rank() <= common::maxRank);
for (SubscriptValue i{0}; i < array.rank(); ++i) {
const Dimension &dimension{array.GetDimension(i)};
Fortran::runtime::ApplyIntegerKind<RawStoreIntegerAt, void>(
kind, terminator, result, i, dimension.Extent());
}
}

} // extern "C"
} // namespace Fortran::runtime
108 changes: 108 additions & 0 deletions flang/test/Lower/HLFIR/assumed-rank-inquiries-2.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
! Test lowering of SIZE/SIZEOF inquiry intrinsics with assumed-ranks
! arguments.
! RUN: bbc -emit-hlfir -o - %s -allow-assumed-rank | FileCheck %s


subroutine test_size_1(x)
real :: x(..)
call takes_integer(size(x))
end subroutine

subroutine test_size_2(x)
real :: x(..)
call takes_integer(size(x, 2))
end subroutine

subroutine test_size_3(x, d)
real :: x(..)
integer, optional :: d
call takes_integer(size(x, d))
end subroutine

subroutine test_size_4(x)
real, allocatable :: x(..)
call takes_integer(size(x))
end subroutine


! CHECK-LABEL: func.func @_QPtest_size_1(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"}) {
! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {uniq_name = "_QFtest_size_1Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>)
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_2]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none>
! CHECK: %[[VAL_7:.*]] = fir.call @_FortranASize(%[[VAL_5]]
! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i64) -> i32
! CHECK: %[[VAL_9:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.call @_QPtakes_integer(%[[VAL_9]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
! CHECK: hlfir.end_associate %[[VAL_9]]#1, %[[VAL_9]]#2 : !fir.ref<i32>, i1
! CHECK: return
! CHECK: }

! CHECK-LABEL: func.func @_QPtest_size_2(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"}) {
! CHECK: %[[VAL_1:.*]] = fir.alloca i32
! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_size_2Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>)
! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32
! CHECK: fir.store %[[VAL_4]] to %[[VAL_1]] : !fir.ref<i32>
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_1]] : (!fir.ref<i32>) -> i64
! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (i32) {
! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none>
! CHECK: %[[VAL_13:.*]] = fir.call @_FortranASize(%[[VAL_11]]
! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32
! CHECK: fir.result %[[VAL_14]] : i32
! CHECK: } else {
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_1]] : !fir.ref<i32>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none>
! CHECK: %[[VAL_20:.*]] = fir.call @_FortranASizeDim(%[[VAL_18]]
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i64) -> i32
! CHECK: fir.result %[[VAL_21]] : i32
! CHECK: }
! CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.call @_QPtakes_integer(%[[VAL_22]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
! CHECK: hlfir.end_associate %[[VAL_22]]#1, %[[VAL_22]]#2 : !fir.ref<i32>, i1
! CHECK: return
! CHECK: }

! CHECK-LABEL: func.func @_QPtest_size_3(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.box<!fir.array<*:f32>> {fir.bindc_name = "x"},
! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref<i32> {fir.bindc_name = "d", fir.optional}) {
! CHECK: %[[VAL_2:.*]] = fir.dummy_scope : !fir.dscope
! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]] dummy_scope %[[VAL_2]] {fortran_attrs = #fir.var_attrs<optional>, uniq_name = "_QFtest_size_3Ed"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_2]] {uniq_name = "_QFtest_size_3Ex"} : (!fir.box<!fir.array<*:f32>>, !fir.dscope) -> (!fir.box<!fir.array<*:f32>>, !fir.box<!fir.array<*:f32>>)
! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref<i32>) -> i64
! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64
! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64
! CHECK: %[[VAL_8:.*]] = fir.if %[[VAL_7]] -> (i32) {
! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none>
! CHECK: %[[VAL_13:.*]] = fir.call @_FortranASize(%[[VAL_11]],
! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i64) -> i32
! CHECK: fir.result %[[VAL_14]] : i32
! CHECK: } else {
! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_3]]#1 : !fir.ref<i32>
! CHECK: %[[VAL_18:.*]] = fir.convert %[[VAL_4]]#1 : (!fir.box<!fir.array<*:f32>>) -> !fir.box<none>
! CHECK: %[[VAL_20:.*]] = fir.call @_FortranASizeDim(%[[VAL_18]]
! CHECK: %[[VAL_21:.*]] = fir.convert %[[VAL_20]] : (i64) -> i32
! CHECK: fir.result %[[VAL_21]] : i32
! CHECK: }
! CHECK: %[[VAL_22:.*]]:3 = hlfir.associate %[[VAL_8]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.call @_QPtakes_integer(%[[VAL_22]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
! CHECK: hlfir.end_associate %[[VAL_22]]#1, %[[VAL_22]]#2 : !fir.ref<i32>, i1
! CHECK: return
! CHECK: }

! CHECK-LABEL: func.func @_QPtest_size_4(
! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>> {fir.bindc_name = "x"}) {
! CHECK: %[[VAL_1:.*]] = fir.dummy_scope : !fir.dscope
! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_0]] dummy_scope %[[VAL_1]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFtest_size_4Ex"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.dscope) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>)
! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_2]]#1 : !fir.ref<!fir.box<!fir.heap<!fir.array<*:f32>>>>
! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_3]] : (!fir.box<!fir.heap<!fir.array<*:f32>>>) -> !fir.box<none>
! CHECK: %[[VAL_8:.*]] = fir.call @_FortranASize(%[[VAL_6]]
! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]] : (i64) -> i32
! CHECK: %[[VAL_10:.*]]:3 = hlfir.associate %[[VAL_9]] {adapt.valuebyref} : (i32) -> (!fir.ref<i32>, !fir.ref<i32>, i1)
! CHECK: fir.call @_QPtakes_integer(%[[VAL_10]]#1) fastmath<contract> : (!fir.ref<i32>) -> ()
! CHECK: hlfir.end_associate %[[VAL_10]]#1, %[[VAL_10]]#2 : !fir.ref<i32>, i1
! CHECK: return
! CHECK: }
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
! Tests delayed privatization for `targets ... private(..)` for allocatables.

! RUN: %flang_fc1 -emit-hlfir -fopenmp -mmlir --openmp-enable-delayed-privatization-staging \
! RUN: -o - %s 2>&1 | FileCheck %s
! RUN: bbc -emit-hlfir -fopenmp --openmp-enable-delayed-privatization-staging -o - %s 2>&1 \
! RUN: | FileCheck %s

subroutine target_allocatable
implicit none
integer, allocatable :: alloc_var

!$omp target private(alloc_var)
alloc_var = 10
!$omp end target
end subroutine target_allocatable

! CHECK-LABEL: omp.private {type = private}
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM:.*]] :
! CHECK-SAME: [[TYPE:!fir.ref<!fir.box<!fir.heap<i32>>>]] alloc {
! CHECK: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):
! CHECK: %[[PRIV_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>> {bindc_name = "alloc_var", {{.*}}}

! CHECK-NEXT: %[[PRIV_ARG_VAL:.*]] = fir.load %[[PRIV_ARG]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: %[[PRIV_ARG_BOX:.*]] = fir.box_addr %[[PRIV_ARG_VAL]] : (!fir.box<!fir.heap<i32>>) -> !fir.heap<i32>
! CHECK-NEXT: %[[PRIV_ARG_ADDR:.*]] = fir.convert %[[PRIV_ARG_BOX]] : (!fir.heap<i32>) -> i64
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[ALLOC_COND:.*]] = arith.cmpi ne, %[[PRIV_ARG_ADDR]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[ALLOC_COND]] {
! CHECK: %[[PRIV_ALLOCMEM:.*]] = fir.allocmem i32 {fir.must_be_heap = true, {{.*}}}
! CHECK-NEXT: %[[PRIV_ALLOCMEM_BOX:.*]] = fir.embox %[[PRIV_ALLOCMEM]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[PRIV_ALLOCMEM_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: } else {
! CHECK-NEXT: %[[ZERO_BITS:.*]] = fir.zero_bits !fir.heap<i32>
! CHECK-NEXT: %[[ZERO_BOX:.*]] = fir.embox %[[ZERO_BITS]] : (!fir.heap<i32>) -> !fir.box<!fir.heap<i32>>
! CHECK-NEXT: fir.store %[[ZERO_BOX]] to %[[PRIV_ALLOC]] : !fir.ref<!fir.box<!fir.heap<i32>>>
! CHECK-NEXT: }

! CHECK-NEXT: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[PRIV_ALLOC]]
! CHECK-NEXT: omp.yield(%[[PRIV_DECL]]#0 : [[TYPE]])

! CHECK-NEXT: } dealloc {
! CHECK-NEXT: ^bb0(%[[PRIV_ARG:.*]]: [[TYPE]]):

! CHECK-NEXT: %[[PRIV_VAL:.*]] = fir.load %[[PRIV_ARG]]
! CHECK-NEXT: %[[PRIV_ADDR:.*]] = fir.box_addr %[[PRIV_VAL]]
! CHECK-NEXT: %[[PRIV_ADDR_I64:.*]] = fir.convert %[[PRIV_ADDR]]
! CHECK-NEXT: %[[C0:.*]] = arith.constant 0 : i64
! CHECK-NEXT: %[[PRIV_NULL_COND:.*]] = arith.cmpi ne, %[[PRIV_ADDR_I64]], %[[C0]] : i64

! CHECK-NEXT: fir.if %[[PRIV_NULL_COND]] {
! CHECK: %[[PRIV_VAL_2:.*]] = fir.load %[[PRIV_ARG]]
! CHECK-NEXT: %[[PRIV_ADDR_2:.*]] = fir.box_addr %[[PRIV_VAL_2]]
! CHECK-NEXT: fir.freemem %[[PRIV_ADDR_2]]
! CHECK-NEXT: %[[ZEROS:.*]] = fir.zero_bits
! CHECK-NEXT: %[[ZEROS_BOX:.*]] = fir.embox %[[ZEROS]]
! CHECK-NEXT: fir.store %[[ZEROS_BOX]] to %[[PRIV_ARG]]
! CHECK-NEXT: }

! CHECK-NEXT: omp.yield
! CHECK-NEXT: }


! CHECK-LABEL: func.func @_QPtarget_allocatable() {

! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca !fir.box<!fir.heap<i32>>
! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]

! CHECK: omp.target private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
Loading