-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP][USM] Introduces -fopenmp-force-usm flag (#76571)
This flag forces the compiler to generate code for OpenMP target regions as if the user specified the #pragma omp requires unified_shared_memory in each source file. The option does not have a -fno-* friend since OpenMP requires the unified_shared_memory clause to be present in all source files. Since this flag does no harm if the clause is present, it can be used in conjunction. My understanding is that USM should not be turned off selectively, hence, no -fno- version. This adds a basic test to check the correct generation of double indirect access to declare target globals in USM mode vs non-USM mode. Which I think is the only difference observable in code generation. This runtime test checks for the (non-)occurence of data movement between host and device. It does one run without the flag and one with the flag to also see that both versions behave as expected. In the case w/o the new flag data movement between host and device is expected. In the case with the flag such data movement should not be present / reported.
- Loading branch information
Showing
7 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ --version 3 | ||
// REQUIRES: amdgpu-registered-target | ||
|
||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp-force-usm -emit-llvm-bc %s -o %t-ppc-host.bc | ||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-force-usm -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix=CHECK-USM %s | ||
|
||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc | ||
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck -check-prefix=CHECK-DEFAULT %s | ||
// expected-no-diagnostics | ||
|
||
extern "C" void *malloc(unsigned int b); | ||
|
||
int GI; | ||
#pragma omp declare target | ||
int *pGI; | ||
#pragma omp end declare target | ||
|
||
int main(void) { | ||
|
||
GI = 0; | ||
|
||
pGI = (int *) malloc(sizeof(int)); | ||
*pGI = 42; | ||
|
||
#pragma omp target map(pGI[:1], GI) | ||
{ | ||
GI = 1; | ||
*pGI = 2; | ||
} | ||
|
||
return 0; | ||
} | ||
|
||
// CHECK-USM-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25( | ||
// CHECK-USM-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GI:%.*]]) #[[ATTR0:[0-9]+]] { | ||
// CHECK-USM-NEXT: entry: | ||
// CHECK-USM-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-USM-NEXT: [[GI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-USM-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr | ||
// CHECK-USM-NEXT: [[GI_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GI_ADDR]] to ptr | ||
// CHECK-USM-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 | ||
// CHECK-USM-NEXT: store ptr [[GI]], ptr [[GI_ADDR_ASCAST]], align 8 | ||
// CHECK-USM-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GI_ADDR_ASCAST]], align 8 | ||
// CHECK-USM-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment to ptr), ptr [[DYN_PTR]]) | ||
// CHECK-USM-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 | ||
// CHECK-USM-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] | ||
// CHECK-USM: user_code.entry: | ||
// CHECK-USM-NEXT: store i32 1, ptr [[TMP0]], align 4 | ||
// CHECK-USM-NEXT: [[TMP2:%.*]] = load ptr, ptr @pGI_decl_tgt_ref_ptr, align 8 | ||
// CHECK-USM-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 | ||
// CHECK-USM-NEXT: store i32 2, ptr [[TMP3]], align 4 | ||
// CHECK-USM-NEXT: call void @__kmpc_target_deinit() | ||
// CHECK-USM-NEXT: ret void | ||
// CHECK-USM: worker.exit: | ||
// CHECK-USM-NEXT: ret void | ||
// | ||
// | ||
// CHECK-DEFAULT-LABEL: define weak_odr protected amdgpu_kernel void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25( | ||
// CHECK-DEFAULT-SAME: ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GI:%.*]]) #[[ATTR0:[0-9]+]] { | ||
// CHECK-DEFAULT-NEXT: entry: | ||
// CHECK-DEFAULT-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-DEFAULT-NEXT: [[GI_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) | ||
// CHECK-DEFAULT-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr | ||
// CHECK-DEFAULT-NEXT: [[GI_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[GI_ADDR]] to ptr | ||
// CHECK-DEFAULT-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8 | ||
// CHECK-DEFAULT-NEXT: store ptr [[GI]], ptr [[GI_ADDR_ASCAST]], align 8 | ||
// CHECK-DEFAULT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GI_ADDR_ASCAST]], align 8 | ||
// CHECK-DEFAULT-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_kernel_environment to ptr), ptr [[DYN_PTR]]) | ||
// CHECK-DEFAULT-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 | ||
// CHECK-DEFAULT-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] | ||
// CHECK-DEFAULT: user_code.entry: | ||
// CHECK-DEFAULT-NEXT: store i32 1, ptr [[TMP0]], align 4 | ||
// CHECK-DEFAULT-NEXT: [[TMP2:%.*]] = load ptr, ptr addrspacecast (ptr addrspace(1) @pGI to ptr), align 8 | ||
// CHECK-DEFAULT-NEXT: store i32 2, ptr [[TMP2]], align 4 | ||
// CHECK-DEFAULT-NEXT: call void @__kmpc_target_deinit() | ||
// CHECK-DEFAULT-NEXT: ret void | ||
// CHECK-DEFAULT: worker.exit: | ||
// CHECK-DEFAULT-NEXT: ret void | ||
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
// clang-format off | ||
// RUN: %libomptarget-compilexx-generic | ||
// RUN: env LIBOMPTARGET_INFO=32 %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=NO-USM | ||
// | ||
// RUN: %libomptarget-compilexxx-generic-force-usm | ||
// RUN: env HSA_XNACK=1 LIBOMPTARGET_INFO=32 \ | ||
// RUN: %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=FORCE-USM | ||
// | ||
// UNSUPPORTED: nvptx64-nvidia-cuda | ||
// UNSUPPORTED: nvptx64-nvidia-cuda-LTO | ||
// clang-format on | ||
|
||
#include <cassert> | ||
#include <cstdio> | ||
#include <cstdlib> | ||
|
||
int GI; | ||
#pragma omp declare target | ||
int *pGI; | ||
#pragma omp end declare target | ||
|
||
int main(void) { | ||
|
||
GI = 0; | ||
// Implicit mappings | ||
int alpha = 1; | ||
int beta[3] = {2, 5, 8}; | ||
|
||
// Require map clauses for non-USM execution | ||
pGI = (int *)malloc(sizeof(int)); | ||
*pGI = 42; | ||
|
||
#pragma omp target map(pGI[ : 1], GI) | ||
{ | ||
GI = 1 * alpha; | ||
*pGI = 2 * beta[1]; | ||
} | ||
|
||
assert(GI == 1); | ||
assert(*pGI == 10); | ||
|
||
printf("SUCCESS\n"); | ||
|
||
return 0; | ||
} | ||
|
||
// clang-format off | ||
// NO-USM: omptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=4 | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=12 | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=4 | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from host to device, HstPtr={{.*}}, TgtPtr={{.*}}, Size=8, Name=pGI | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, Size=4 | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, Size=12 | ||
// NO-USM-NEXT: omptarget device 0 info: Copying data from device to host, TgtPtr={{.*}}, HstPtr={{.*}}, Size=4 | ||
// NO-USM-NEXT: SUCCESS | ||
|
||
// FORCE-USM: SUCCESS | ||
// | ||
// clang-format on |