Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[OpenMP] Add option to make offloading mandatory
Currently when we generate OpenMP offloading code we always make fallback code for the CPU. This is necessary for implementing features like conditional offloading and ensuring that unhandled pragmas don't result in missing symbols. However, this is problematic for a few cases. For offloading tests we can silently fail to the host without realizing that offloading failed. Additionally, this makes it impossible to provide interoperabiility to other offloading schemes like HIP or CUDA because those methods do not provide any such host fallback guaruntee. this patch adds the `-fopenmp-offload-mandatory` flag to prevent generating the fallback symbol on the CPU and instead replaces the function with a dummy global and the failed branch with 'unreachable'. Reviewed By: ABataev Differential Revision: https://reviews.llvm.org/D120353
- Loading branch information
Showing
7 changed files
with
151 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" | ||
// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp-offload-mandatory -emit-llvm %s -o - | FileCheck %s --check-prefix=MANDATORY | ||
// expected-no-diagnostics | ||
|
||
void foo() {} | ||
#pragma omp declare target(foo) | ||
|
||
void bar() {} | ||
#pragma omp declare target device_type(nohost) to(bar) | ||
|
||
void host() { | ||
#pragma omp target | ||
{ bar(); } | ||
} | ||
|
||
void host_if(bool cond) { | ||
#pragma omp target if(cond) | ||
{ bar(); } | ||
} | ||
|
||
void host_dev(int device) { | ||
#pragma omp target device(device) | ||
{ bar(); } | ||
} | ||
// MANDATORY-LABEL: define {{[^@]+}}@_Z3foov | ||
// MANDATORY-SAME: () #[[ATTR0:[0-9]+]] { | ||
// MANDATORY-NEXT: entry: | ||
// MANDATORY-NEXT: ret void | ||
// | ||
// | ||
// MANDATORY-LABEL: define {{[^@]+}}@_Z4hostv | ||
// MANDATORY-SAME: () #[[ATTR0]] { | ||
// MANDATORY-NEXT: entry: | ||
// MANDATORY-NEXT: [[TMP0:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z4hostv_l12.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) | ||
// MANDATORY-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0 | ||
// MANDATORY-NEXT: br i1 [[TMP1]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] | ||
// MANDATORY: omp_offload.failed: | ||
// MANDATORY-NEXT: unreachable | ||
// MANDATORY: omp_offload.cont: | ||
// MANDATORY-NEXT: ret void | ||
// | ||
// | ||
// MANDATORY-LABEL: define {{[^@]+}}@_Z7host_ifb | ||
// MANDATORY-SAME: (i1 noundef zeroext [[COND:%.*]]) #[[ATTR0]] { | ||
// MANDATORY-NEXT: entry: | ||
// MANDATORY-NEXT: [[COND_ADDR:%.*]] = alloca i8, align 1 | ||
// MANDATORY-NEXT: [[FROMBOOL:%.*]] = zext i1 [[COND]] to i8 | ||
// MANDATORY-NEXT: store i8 [[FROMBOOL]], i8* [[COND_ADDR]], align 1 | ||
// MANDATORY-NEXT: [[TMP0:%.*]] = load i8, i8* [[COND_ADDR]], align 1 | ||
// MANDATORY-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP0]] to i1 | ||
// MANDATORY-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] | ||
// MANDATORY: omp_if.then: | ||
// MANDATORY-NEXT: [[TMP1:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7host_ifb_l17.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) | ||
// MANDATORY-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 | ||
// MANDATORY-NEXT: br i1 [[TMP2]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] | ||
// MANDATORY: omp_offload.failed: | ||
// MANDATORY-NEXT: unreachable | ||
// MANDATORY: omp_offload.cont: | ||
// MANDATORY-NEXT: br label [[OMP_IF_END:%.*]] | ||
// MANDATORY: omp_if.else: | ||
// MANDATORY-NEXT: unreachable | ||
// MANDATORY: omp_if.end: | ||
// MANDATORY-NEXT: ret void | ||
// | ||
// | ||
// MANDATORY-LABEL: define {{[^@]+}}@_Z8host_devi | ||
// MANDATORY-SAME: (i32 noundef signext [[DEVICE:%.*]]) #[[ATTR0]] { | ||
// MANDATORY-NEXT: entry: | ||
// MANDATORY-NEXT: [[DEVICE_ADDR:%.*]] = alloca i32, align 4 | ||
// MANDATORY-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 | ||
// MANDATORY-NEXT: store i32 [[DEVICE]], i32* [[DEVICE_ADDR]], align 4 | ||
// MANDATORY-NEXT: [[TMP0:%.*]] = load i32, i32* [[DEVICE_ADDR]], align 4 | ||
// MANDATORY-NEXT: store i32 [[TMP0]], i32* [[DOTCAPTURE_EXPR_]], align 4 | ||
// MANDATORY-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 | ||
// MANDATORY-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 | ||
// MANDATORY-NEXT: [[TMP3:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 [[TMP2]], i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8host_devi_l22.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null) | ||
// MANDATORY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 | ||
// MANDATORY-NEXT: br i1 [[TMP4]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] | ||
// MANDATORY: omp_offload.failed: | ||
// MANDATORY-NEXT: unreachable | ||
// MANDATORY: omp_offload.cont: | ||
// MANDATORY-NEXT: ret void | ||
// | ||
// | ||
// MANDATORY-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg | ||
// MANDATORY-SAME: () #[[ATTR3:[0-9]+]] { | ||
// MANDATORY-NEXT: entry: | ||
// MANDATORY-NEXT: call void @__tgt_register_requires(i64 1) | ||
// MANDATORY-NEXT: ret void | ||
// |