From 3d3e34b9d002ea7ee919cd420eadf4df27abbb91 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Thu, 30 Oct 2025 12:49:37 -0700 Subject: [PATCH] [clang][lit] Add SPIR-V to some OpenMP offload tests Signed-off-by: Nick Sarnie --- clang/test/OpenMP/metadirective_ast_print.c | 36 ++++++++++++---- .../metadirective_device_arch_codegen.cpp | 28 ++++++++----- clang/test/OpenMP/thread_limit_amdgpu.c | 34 --------------- clang/test/OpenMP/thread_limit_gpu.c | 41 +++++++++++++++++++ 4 files changed, 86 insertions(+), 53 deletions(-) delete mode 100644 clang/test/OpenMP/thread_limit_amdgpu.c create mode 100644 clang/test/OpenMP/thread_limit_gpu.c diff --git a/clang/test/OpenMP/metadirective_ast_print.c b/clang/test/OpenMP/metadirective_ast_print.c index 638dbae1bc774..75ef5fa26827c 100644 --- a/clang/test/OpenMP/metadirective_ast_print.c +++ b/clang/test/OpenMP/metadirective_ast_print.c @@ -2,17 +2,25 @@ // RUN: %clang_cc1 -verify -fopenmp-simd -triple x86_64-unknown-linux-gnu -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT -// RUN: %clang_cc1 -verify -fopenmp -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-AMDGCN +// RUN: %clang_cc1 -verify -fopenmp -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-GPU -// RUN: %clang_cc1 -verify -fopenmp-simd -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-AMDGCN +// RUN: %clang_cc1 -verify -fopenmp-simd -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-GPU + +// RUN: %clang_cc1 -verify -fopenmp -triple spirv64-intel -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-GPU + +// RUN: %clang_cc1 -verify -fopenmp-simd -triple spirv64-intel -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=DEFAULT-GPU // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -DOMP52 -triple x86_64-unknown-linux-gnu -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52 // RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -DOMP52 -triple x86_64-unknown-linux-gnu -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -DOMP52 -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-AMDGCN +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -DOMP52 -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-GPU + +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -DOMP52 -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-GPU + +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -DOMP52 -triple spirv64-intel -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-GPU -// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -DOMP52 -triple amdgcn-amd-amdhsa -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-AMDGCN +// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=52 -DOMP52 -triple spirv64-intel -x c -std=c99 -ast-print %s -o - | FileCheck %s --check-prefix=OMP52-GPU // expected-no-diagnostics #ifndef HEADER @@ -77,6 +85,12 @@ void foo1(void) { for (int i = 0; i < 100; i++) ; +#pragma omp metadirective when(device={arch("spirv64")}: \ + teams distribute parallel for)\ + otherwise(parallel for) + for (int i = 0; i < 100; i++) + ; + #pragma omp metadirective when(implementation = {extension(match_all)} \ : nothing) otherwise(parallel for) for (int i = 0; i < 16; i++) @@ -134,8 +148,8 @@ void foo1(void) { // OMP52-NEXT: for (int i = 0; i < 16; i++) { // OMP52-NEXT: #pragma omp simd // OMP52-NEXT: for (int j = 0; j < 16; j++) -// OMP52-AMDGCN: #pragma omp teams distribute parallel for -// OMP52-AMDGCN-NEXT: for (int i = 0; i < 100; i++) +// OMP52-GPU: #pragma omp teams distribute parallel for +// OMP52-GPU-NEXT: for (int i = 0; i < 100; i++) // OMP52: for (int i = 0; i < 16; i++) // OMP52: for (int i = 0; i < 16; i++) @@ -198,6 +212,12 @@ void foo2(void) { for (int i = 0; i < 100; i++) ; +#pragma omp metadirective when(device={arch("spirv64")}: \ + teams distribute parallel for)\ + default(parallel for) + for (int i = 0; i < 100; i++) + ; + #pragma omp metadirective when(implementation = {extension(match_all)} \ : nothing) default(parallel for) for (int i = 0; i < 16; i++) @@ -266,8 +286,8 @@ void foo2(void) { // DEFAULT-NEXT: for (int i = 0; i < 16; i++) { // DEFAULT-NEXT: #pragma omp simd // DEFAULT-NEXT: for (int j = 0; j < 16; j++) -// DEFAULT-AMDGCN: #pragma omp teams distribute parallel for -// DEFAULT-AMDGCN-NEXT: for (int i = 0; i < 100; i++) +// DEFAULT-GPU: #pragma omp teams distribute parallel for +// DEFAULT-GPU-NEXT: for (int i = 0; i < 100; i++) // DEFAULT: for (int i = 0; i < 16; i++) // DEFAULT: for (int i = 0; i < 16; i++) diff --git a/clang/test/OpenMP/metadirective_device_arch_codegen.cpp b/clang/test/OpenMP/metadirective_device_arch_codegen.cpp index eecae310d0a77..1d5584de67162 100644 --- a/clang/test/OpenMP/metadirective_device_arch_codegen.cpp +++ b/clang/test/OpenMP/metadirective_device_arch_codegen.cpp @@ -1,7 +1,7 @@ -// REQUIRES: amdgpu-registered-target - // RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -target-cpu gfx906 -o - | FileCheck %s +// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-ppc-spirv-host.bc +// RUN: %clang_cc1 -fopenmp -x c++ -w -std=c++11 -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-spirv-host.bc -o - | FileCheck %s // expected-no-diagnostics @@ -16,6 +16,12 @@ Inspired from SOLLVE tests: #define N 1024 +#ifdef __AMDGPU__ +#define GPU "amdgcn" +#else +#define GPU "spirv64" +#endif + int metadirective1() { int v1[N], v2[N], v3[N]; @@ -26,7 +32,7 @@ int metadirective1() { #pragma omp target map(to:v1,v2) map(from:v3, target_device_num) device(default_device) { #pragma omp metadirective \ - when(device={arch("amdgcn")}: teams distribute parallel for) \ + when(device={arch(GPU)}: teams distribute parallel for) \ default(parallel for) for (int i = 0; i < N; i++) { @@ -38,28 +44,28 @@ int metadirective1() { return errors; } -// CHECK: define weak_odr protected amdgpu_kernel void @[[METADIRECTIVE:.+metadirective1[a-z0-9_]+]] +// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @[[METADIRECTIVE:.+metadirective1[a-z0-9_]+]] // CHECK: entry: -// CHECK: %{{[0-9]}} = call i32 @__kmpc_target_init +// CHECK: %{{[0-9]}} = call{{.*}} i32 @__kmpc_target_init // CHECK: user_code.entry: -// CHECK: call void @[[METADIRECTIVE]]_omp_outlined -// CHECK-NOT: call void @__kmpc_parallel_51 +// CHECK: call{{.*}} void @[[METADIRECTIVE]]_omp_outlined +// CHECK-NOT: call{{.*}} void @__kmpc_parallel_51 // CHECK: ret void // CHECK: define internal void @[[METADIRECTIVE]]_omp_outlined // CHECK: entry: -// CHECK: call void @__kmpc_distribute_static_init +// CHECK: call{{.*}} void @__kmpc_distribute_static_init // CHECK: omp.loop.exit: -// CHECK: call void @__kmpc_distribute_static_fini +// CHECK: call{{.*}} void @__kmpc_distribute_static_fini // CHECK: define internal void @[[METADIRECTIVE]]_omp_outlined_omp_outlined // CHECK: entry: -// CHECK: call void @__kmpc_for_static_init_4 +// CHECK: call{{.*}} void @__kmpc_for_static_init_4 // CHECK: omp.inner.for.body: // CHECK: store atomic {{.*}} monotonic // CHECK: omp.loop.exit: -// CHECK-NEXT: call void @__kmpc_for_static_fini +// CHECK-NEXT: call{{.*}} void @__kmpc_for_static_fini // CHECK-NEXT: ret void diff --git a/clang/test/OpenMP/thread_limit_amdgpu.c b/clang/test/OpenMP/thread_limit_amdgpu.c deleted file mode 100644 index f884eeb73c3ff..0000000000000 --- a/clang/test/OpenMP/thread_limit_amdgpu.c +++ /dev/null @@ -1,34 +0,0 @@ -// Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s -// expected-no-diagnostics - -#ifndef HEADER -#define HEADER - -void foo(int N) { -#pragma omp target teams distribute parallel for simd - for (int i = 0; i < N; ++i) - ; -#pragma omp target teams distribute parallel for simd thread_limit(4) - for (int i = 0; i < N; ++i) - ; -#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) - for (int i = 0; i < N; ++i) - ; -#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22) - for (int i = 0; i < N; ++i) - ; -} - -#endif - -// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l10({{.*}}) #[[ATTR1:.+]] { -// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l13({{.*}}) #[[ATTR2:.+]] { -// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l16({{.*}}) #[[ATTR3:.+]] { -// CHECK: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l19({{.*}}) #[[ATTR4:.+]] { - -// CHECK: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} } -// CHECK: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} } -// CHECK: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } -// CHECK: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } diff --git a/clang/test/OpenMP/thread_limit_gpu.c b/clang/test/OpenMP/thread_limit_gpu.c new file mode 100644 index 0000000000000..4bcc14d070c22 --- /dev/null +++ b/clang/test/OpenMP/thread_limit_gpu.c @@ -0,0 +1,41 @@ +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck -check-prefixes=CHECK,CHECK-AMDGPU %s +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux-gnu -fopenmp-targets=spirv64-intel -emit-llvm-bc %s -o %t-x86-spirv-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple spirv64-intel -fopenmp-targets=spirv64-intel -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-spirv-host.bc -o - | FileCheck -check-prefixes=CHECK,CHECK-SPIRV %s +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +void foo(int N) { +#pragma omp target teams distribute parallel for simd + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd thread_limit(4) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) + for (int i = 0; i < N; ++i) + ; +#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22) + for (int i = 0; i < N; ++i) + ; +} + +#endif + +// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l12({{.*}}) #[[ATTR1:.+]] { +// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l15({{.*}}) #[[ATTR2:.+]] { +// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l18({{.*}}) #[[ATTR3:.+]] { +// CHECK: define weak_odr protected {{amdgpu|spir}}_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+__Z3fooi_}}l21({{.*}}) #[[ATTR4:.+]] { + +// CHECK-AMDGPU: attributes #[[ATTR1]] = { {{.*}} "amdgpu-flat-work-group-size"="1,256" {{.*}} } +// CHECK-AMDGPU: attributes #[[ATTR2]] = { {{.*}} "amdgpu-flat-work-group-size"="1,4" {{.*}} } +// CHECK-AMDGPU: attributes #[[ATTR3]] = { {{.*}} "amdgpu-flat-work-group-size"="1,42" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } +// CHECK-AMDGPU: attributes #[[ATTR4]] = { {{.*}} "amdgpu-flat-work-group-size"="1,22" "amdgpu-max-num-workgroups"="42,1,1"{{.*}} } + +// CHECK-SPIRV: attributes #[[ATTR1]] = { {{.*}} "omp_target_thread_limit"="256" {{.*}} } +// CHECK-SPIRV: attributes #[[ATTR2]] = { {{.*}} "omp_target_thread_limit"="4" {{.*}} } +// CHECK-SPIRV: attributes #[[ATTR3]] = { {{.*}} "omp_target_num_teams"="42" "omp_target_thread_limit"="42" {{.*}} } +// CHECK-SPIRV: attributes #[[ATTR4]] = { {{.*}} "omp_target_num_teams"="42" "omp_target_thread_limit"="22" {{.*}} }