Skip to content

Commit

Permalink
[Flang][OpenMP][MLIR] Add support for -nogpulib option (#71045)
Browse files Browse the repository at this point in the history
If -nogpulib option is passed by the user, then the OpenMP device
runtime is not used and we should not emit globals to configure
debugging at compile-time for the device runtime.

Link to -nogpulib flag implementation for Clang:
https://reviews.llvm.org/D125314
  • Loading branch information
DominikAdamski authored Jan 10, 2024
1 parent 8f78dd4 commit f443fbc
Show file tree
Hide file tree
Showing 14 changed files with 62 additions and 10 deletions.
2 changes: 1 addition & 1 deletion clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -5197,7 +5197,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group<IncludePath_Group>,
HelpText<"Do not include the default HIP wrapper headers and include paths">;
def : Flag<["-"], "nocudainc">, Alias<nogpuinc>;
def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag<LangOpts<"NoGPULib">>,
Visibility<[ClangOption, CC1Option]>,
Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
HelpText<"Do not link device library for CUDA/HIP device compilation">;
def : Flag<["-"], "nocudalib">, Alias<nogpulib>;
def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Flang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs,
CmdArgs.push_back("-fopenmp-assume-no-thread-state");
if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism))
CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism");
if (Args.hasArg(options::OPT_nogpulib))
CmdArgs.push_back("-nogpulib");
}
}

Expand Down
2 changes: 2 additions & 0 deletions flang/include/flang/Frontend/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ LANGOPT(Name, Bits, Default)

ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast)

/// Indicate a build without the standard GPU libraries.
LANGOPT(NoGPULib , 1, false)
/// Permit floating point optimization without regard to infinities
LANGOPT(NoHonorInfs, 1, false)
/// Permit floating point optimization without regard to NaN
Expand Down
11 changes: 7 additions & 4 deletions flang/include/flang/Tools/CrossToolHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,16 @@ struct OffloadModuleOpts {
OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription,
bool OpenMPThreadSubscription, bool OpenMPNoThreadState,
bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice,
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {})
bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {},
bool NoGPULib = false)
: OpenMPTargetDebug(OpenMPTargetDebug),
OpenMPTeamSubscription(OpenMPTeamSubscription),
OpenMPThreadSubscription(OpenMPThreadSubscription),
OpenMPNoThreadState(OpenMPNoThreadState),
OpenMPNoNestedParallelism(OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU),
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {}
OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile),
NoGPULib(NoGPULib) {}

OffloadModuleOpts(Fortran::frontend::LangOptions &Opts)
: OpenMPTargetDebug(Opts.OpenMPTargetDebug),
Expand All @@ -73,7 +75,7 @@ struct OffloadModuleOpts {
OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism),
OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice),
OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion),
OMPHostIRFile(Opts.OMPHostIRFile) {}
OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {}

uint32_t OpenMPTargetDebug = 0;
bool OpenMPTeamSubscription = false;
Expand All @@ -84,6 +86,7 @@ struct OffloadModuleOpts {
bool OpenMPIsGPU = false;
uint32_t OpenMPVersion = 11;
std::string OMPHostIRFile = {};
bool NoGPULib = false;
};

// Shares assinging of the OpenMP OffloadModuleInterface and its assorted
Expand All @@ -98,7 +101,7 @@ void setOffloadModuleInterfaceAttributes(
if (Opts.OpenMPIsTargetDevice) {
offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription,
Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState,
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion);
Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib);

if (!Opts.OMPHostIRFile.empty())
offloadMod.setHostIRFilePath(Opts.OMPHostIRFile);
Expand Down
2 changes: 2 additions & 0 deletions flang/lib/Frontend/CompilerInvocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args,
args.hasArg(clang::driver::options::OPT_fopenmp_target_debug))
res.getLangOpts().OpenMPTargetDebug = 1;
}
if (args.hasArg(clang::driver::options::OPT_nogpulib))
res.getLangOpts().NoGPULib = 1;
}

switch (llvm::Triple(res.getTargetOpts().triple).getArch()) {
Expand Down
1 change: 1 addition & 0 deletions flang/test/Driver/driver-help-hidden.f90
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
! CHECK-NEXT: --no-offload-arch=<value>
! CHECK-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! CHECK-NEXT: -nocpp Disable predefined and command line preprocessor macros
! CHECK-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! CHECK-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! CHECK-NEXT: --offload-device-only Only compile for the offloading device.
! CHECK-NEXT: --offload-host-device Compile for both the offloading host and device (default).
Expand Down
2 changes: 2 additions & 0 deletions flang/test/Driver/driver-help.f90
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
! HELP-NEXT: --no-offload-arch=<value>
! HELP-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value.
! HELP-NEXT: -nocpp Disable predefined and command line preprocessor macros
! HELP-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-NEXT: --offload-arch=<value> Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once.
! HELP-NEXT: --offload-device-only Only compile for the offloading device.
! HELP-NEXT: --offload-host-device Compile for both the offloading host and device (default).
Expand Down Expand Up @@ -249,6 +250,7 @@
! HELP-FC1-NEXT: -mvscale-max=<value> Specify the vscale maximum. Defaults to the vector length agnostic value of "0". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -mvscale-min=<value> Specify the vscale minimum. Defaults to "1". (AArch64/RISC-V only)
! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros
! HELP-FC1-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation
! HELP-FC1-NEXT: -opt-record-file <value>
! HELP-FC1-NEXT: File name to use for YAML optimization record output
! HELP-FC1-NEXT: -opt-record-format <value>
Expand Down
12 changes: 12 additions & 0 deletions flang/test/Lower/OpenMP/nogpulib.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
!REQUIRES: amdgpu-registered-target

!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s
!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -nogpulib %s -o - | FileCheck %s -check-prefix=FLAG_SET
!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -nogpulib -o - %s | FileCheck %s -check-prefix=FLAG_SET

!CHECK-NOT: module attributes {{{.*}}no_gpu_lib
!FLAG_SET: module attributes {{{.*}}no_gpu_lib = true
subroutine omp_subroutine()
end subroutine omp_subroutine

8 changes: 7 additions & 1 deletion flang/tools/bbc/bbc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,12 @@ static llvm::cl::opt<bool> setOpenMPNoNestedParallelism(
"a parallel region."),
llvm::cl::init(false));

static llvm::cl::opt<bool>
setNoGPULib("nogpulib",
llvm::cl::desc("Do not link device library for CUDA/HIP device "
"compilation"),
llvm::cl::init(false));

static llvm::cl::opt<bool> enableOpenACC("fopenacc",
llvm::cl::desc("enable openacc"),
llvm::cl::init(false));
Expand Down Expand Up @@ -349,7 +355,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR(
OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription,
setOpenMPThreadSubscription, setOpenMPNoThreadState,
setOpenMPNoNestedParallelism, enableOpenMPDevice,
enableOpenMPGPU, setOpenMPVersion);
enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib);
setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts);
setOpenMPVersionAttribute(mlirModule, setOpenMPVersion);
}
Expand Down
1 change: 1 addition & 0 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> {
DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription,
DefaultValuedParameter<"bool", "false">:$assume_no_thread_state,
DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism,
DefaultValuedParameter<"bool", "false">:$no_gpu_lib,
DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version
);

Expand Down
5 changes: 3 additions & 2 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,12 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> {
"bool":$assumeThreadsOversubscription,
"bool":$assumeNoThreadState,
"bool":$assumeNoNestedParallelism,
"uint32_t":$openmpDeviceVersion), [{}], [{
"uint32_t":$openmpDeviceVersion,
"bool":$noGPULib), [{}], [{
$_op->setAttr(("omp." + mlir::omp::FlagsAttr::getMnemonic()).str(),
mlir::omp::FlagsAttr::get($_op->getContext(), debugKind,
assumeTeamsOversubscription, assumeThreadsOversubscription,
assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion));
assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion));
}]>,
InterfaceMethod<
/*description=*/[{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2035,6 +2035,12 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,

llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();

ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
attribute.getOpenmpDeviceVersion());

if (attribute.getNoGpuLib())
return success();

ompBuilder->createGlobalFlag(
attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/,
"__omp_rtl_debug_kind");
Expand All @@ -2056,8 +2062,6 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute,
.getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/
,
"__omp_rtl_assume_no_nested_parallelism");
ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device",
attribute.getOpenmpDeviceVersion());
return success();
}

Expand Down
6 changes: 6 additions & 0 deletions mlir/test/Dialect/OpenMP/attr.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,
// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true, no_gpu_lib = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_no_thread_state = true, assume_teams_oversubscription = true, no_gpu_lib = true, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, openmp_device_version = 51>} {
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, no_gpu_lib = false, openmp_device_version = 51>} {}

// CHECK: module attributes {omp.version = #omp.version<version = 51>} {
module attributes {omp.version = #omp.version<version = 51>} {}

Expand Down
10 changes: 10 additions & 0 deletions mlir/test/Target/LLVMIR/openmp-llvm.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -2530,6 +2530,16 @@ module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true,

// -----

// CHECK-NOT: @__omp_rtl_debug_kind = weak_odr hidden constant i32 0
// CHECK-NOT: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden constant i32 1
// CHECK-NOT: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden constant i32 0
// CHECK-NOT: @__omp_rtl_assume_no_thread_state = weak_odr hidden constant i32 1
// CHECK-NOT: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden constant i32 0
module attributes {omp.flags = #omp.flags<assume_teams_oversubscription = true, assume_no_thread_state = true,
no_gpu_lib=true>} {}

// -----

module attributes {omp.is_target_device = false} {
// CHECK: define void @filter_nohost
llvm.func @filter_nohost() -> ()
Expand Down

0 comments on commit f443fbc

Please sign in to comment.