diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 84648c6d55008..a76e8dcff148b 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5197,7 +5197,7 @@ def nohipwrapperinc : Flag<["-"], "nohipwrapperinc">, Group, HelpText<"Do not include the default HIP wrapper headers and include paths">; def : Flag<["-"], "nocudainc">, Alias; def nogpulib : Flag<["-"], "nogpulib">, MarshallingInfoFlag>, - Visibility<[ClangOption, CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Do not link device library for CUDA/HIP device compilation">; def : Flag<["-"], "nocudalib">, Alias; def gpulibc : Flag<["-"], "gpulibc">, Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 41eaad3bbad0a..5d2fc6cb028e2 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -428,6 +428,8 @@ void Flang::addOffloadOptions(Compilation &C, const InputInfoList &Inputs, CmdArgs.push_back("-fopenmp-assume-no-thread-state"); if (Args.hasArg(options::OPT_fopenmp_assume_no_nested_parallelism)) CmdArgs.push_back("-fopenmp-assume-no-nested-parallelism"); + if (Args.hasArg(options::OPT_nogpulib)) + CmdArgs.push_back("-nogpulib"); } } diff --git a/flang/include/flang/Frontend/LangOptions.def b/flang/include/flang/Frontend/LangOptions.def index 3a1d44f7fb472..2bf10826120a8 100644 --- a/flang/include/flang/Frontend/LangOptions.def +++ b/flang/include/flang/Frontend/LangOptions.def @@ -21,6 +21,8 @@ LANGOPT(Name, Bits, Default) ENUM_LANGOPT(FPContractMode, FPModeKind, 2, FPM_Fast) ///< FP Contract Mode (off/fast) +/// Indicate a build without the standard GPU libraries. +LANGOPT(NoGPULib , 1, false) /// Permit floating point optimization without regard to infinities LANGOPT(NoHonorInfs, 1, false) /// Permit floating point optimization without regard to NaN diff --git a/flang/include/flang/Tools/CrossToolHelpers.h b/flang/include/flang/Tools/CrossToolHelpers.h index b346b30b158ae..b61224ff4f1b3 100644 --- a/flang/include/flang/Tools/CrossToolHelpers.h +++ b/flang/include/flang/Tools/CrossToolHelpers.h @@ -56,14 +56,16 @@ struct OffloadModuleOpts { OffloadModuleOpts(uint32_t OpenMPTargetDebug, bool OpenMPTeamSubscription, bool OpenMPThreadSubscription, bool OpenMPNoThreadState, bool OpenMPNoNestedParallelism, bool OpenMPIsTargetDevice, - bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {}) + bool OpenMPIsGPU, uint32_t OpenMPVersion, std::string OMPHostIRFile = {}, + bool NoGPULib = false) : OpenMPTargetDebug(OpenMPTargetDebug), OpenMPTeamSubscription(OpenMPTeamSubscription), OpenMPThreadSubscription(OpenMPThreadSubscription), OpenMPNoThreadState(OpenMPNoThreadState), OpenMPNoNestedParallelism(OpenMPNoNestedParallelism), OpenMPIsTargetDevice(OpenMPIsTargetDevice), OpenMPIsGPU(OpenMPIsGPU), - OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile) {} + OpenMPVersion(OpenMPVersion), OMPHostIRFile(OMPHostIRFile), + NoGPULib(NoGPULib) {} OffloadModuleOpts(Fortran::frontend::LangOptions &Opts) : OpenMPTargetDebug(Opts.OpenMPTargetDebug), @@ -73,7 +75,7 @@ struct OffloadModuleOpts { OpenMPNoNestedParallelism(Opts.OpenMPNoNestedParallelism), OpenMPIsTargetDevice(Opts.OpenMPIsTargetDevice), OpenMPIsGPU(Opts.OpenMPIsGPU), OpenMPVersion(Opts.OpenMPVersion), - OMPHostIRFile(Opts.OMPHostIRFile) {} + OMPHostIRFile(Opts.OMPHostIRFile), NoGPULib(Opts.NoGPULib) {} uint32_t OpenMPTargetDebug = 0; bool OpenMPTeamSubscription = false; @@ -84,6 +86,7 @@ struct OffloadModuleOpts { bool OpenMPIsGPU = false; uint32_t OpenMPVersion = 11; std::string OMPHostIRFile = {}; + bool NoGPULib = false; }; // Shares assinging of the OpenMP OffloadModuleInterface and its assorted @@ -98,7 +101,7 @@ void setOffloadModuleInterfaceAttributes( if (Opts.OpenMPIsTargetDevice) { offloadMod.setFlags(Opts.OpenMPTargetDebug, Opts.OpenMPTeamSubscription, Opts.OpenMPThreadSubscription, Opts.OpenMPNoThreadState, - Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion); + Opts.OpenMPNoNestedParallelism, Opts.OpenMPVersion, Opts.NoGPULib); if (!Opts.OMPHostIRFile.empty()) offloadMod.setHostIRFilePath(Opts.OMPHostIRFile); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index b65b6e31bea82..0732f4bef290f 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -935,6 +935,8 @@ static bool parseDialectArgs(CompilerInvocation &res, llvm::opt::ArgList &args, args.hasArg(clang::driver::options::OPT_fopenmp_target_debug)) res.getLangOpts().OpenMPTargetDebug = 1; } + if (args.hasArg(clang::driver::options::OPT_nogpulib)) + res.getLangOpts().NoGPULib = 1; } switch (llvm::Triple(res.getTargetOpts().triple).getArch()) { diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index 70bb9f8eb512c..ab39dce962c6c 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -127,6 +127,7 @@ ! CHECK-NEXT: --no-offload-arch= ! CHECK-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value. ! CHECK-NEXT: -nocpp Disable predefined and command line preprocessor macros +! CHECK-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation ! CHECK-NEXT: --offload-arch= Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once. ! CHECK-NEXT: --offload-device-only Only compile for the offloading device. ! CHECK-NEXT: --offload-host-device Compile for both the offloading host and device (default). diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index 0d760616aace0..c1ec2a028d4b3 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -113,6 +113,7 @@ ! HELP-NEXT: --no-offload-arch= ! HELP-NEXT: Remove CUDA/HIP offloading device architecture (e.g. sm_35, gfx906) from the list of devices to compile for. 'all' resets the list to its default value. ! HELP-NEXT: -nocpp Disable predefined and command line preprocessor macros +! HELP-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation ! HELP-NEXT: --offload-arch= Specify an offloading device architecture for CUDA, HIP, or OpenMP. (e.g. sm_35). If 'native' is used the compiler will detect locally installed architectures. For HIP offloading, the device architecture can be followed by target ID features delimited by a colon (e.g. gfx908:xnack+:sramecc-). May be specified more than once. ! HELP-NEXT: --offload-device-only Only compile for the offloading device. ! HELP-NEXT: --offload-host-device Compile for both the offloading host and device (default). @@ -249,6 +250,7 @@ ! HELP-FC1-NEXT: -mvscale-max= Specify the vscale maximum. Defaults to the vector length agnostic value of "0". (AArch64/RISC-V only) ! HELP-FC1-NEXT: -mvscale-min= Specify the vscale minimum. Defaults to "1". (AArch64/RISC-V only) ! HELP-FC1-NEXT: -nocpp Disable predefined and command line preprocessor macros +! HELP-FC1-NEXT: -nogpulib Do not link device library for CUDA/HIP device compilation ! HELP-FC1-NEXT: -opt-record-file ! HELP-FC1-NEXT: File name to use for YAML optimization record output ! HELP-FC1-NEXT: -opt-record-format diff --git a/flang/test/Lower/OpenMP/nogpulib.f90 b/flang/test/Lower/OpenMP/nogpulib.f90 new file mode 100644 index 0000000000000..f2e67136ecd74 --- /dev/null +++ b/flang/test/Lower/OpenMP/nogpulib.f90 @@ -0,0 +1,12 @@ +!REQUIRES: amdgpu-registered-target + +!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s +!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -o - %s | FileCheck %s +!RUN: %flang_fc1 -triple amdgcn-amd-amdhsa -emit-hlfir -fopenmp -fopenmp-is-target-device -nogpulib %s -o - | FileCheck %s -check-prefix=FLAG_SET +!RUN: bbc -fopenmp -fopenmp-is-target-device -fopenmp-is-gpu -emit-hlfir -nogpulib -o - %s | FileCheck %s -check-prefix=FLAG_SET + +!CHECK-NOT: module attributes {{{.*}}no_gpu_lib +!FLAG_SET: module attributes {{{.*}}no_gpu_lib = true +subroutine omp_subroutine() +end subroutine omp_subroutine + diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp index 0122cf33b0b67..b4ba837a3263f 100644 --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -181,6 +181,12 @@ static llvm::cl::opt setOpenMPNoNestedParallelism( "a parallel region."), llvm::cl::init(false)); +static llvm::cl::opt + setNoGPULib("nogpulib", + llvm::cl::desc("Do not link device library for CUDA/HIP device " + "compilation"), + llvm::cl::init(false)); + static llvm::cl::opt enableOpenACC("fopenacc", llvm::cl::desc("enable openacc"), llvm::cl::init(false)); @@ -349,7 +355,7 @@ static mlir::LogicalResult convertFortranSourceToMLIR( OffloadModuleOpts(setOpenMPTargetDebug, setOpenMPTeamSubscription, setOpenMPThreadSubscription, setOpenMPNoThreadState, setOpenMPNoNestedParallelism, enableOpenMPDevice, - enableOpenMPGPU, setOpenMPVersion); + enableOpenMPGPU, setOpenMPVersion, "", setNoGPULib); setOffloadModuleInterfaceAttributes(mlirModule, offloadModuleOpts); setOpenMPVersionAttribute(mlirModule, setOpenMPVersion); } diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index b9989b335a2ae..d614f2666a85a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -65,6 +65,7 @@ def FlagsAttr : OpenMP_Attr<"Flags", "flags"> { DefaultValuedParameter<"bool", "false">:$assume_threads_oversubscription, DefaultValuedParameter<"bool", "false">:$assume_no_thread_state, DefaultValuedParameter<"bool", "false">:$assume_no_nested_parallelism, + DefaultValuedParameter<"bool", "false">:$no_gpu_lib, DefaultValuedParameter<"uint32_t", "50">:$openmp_device_version ); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td index 77001fc816cf9..89d04af64766f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOpsInterfaces.td @@ -198,11 +198,12 @@ def OffloadModuleInterface : OpInterface<"OffloadModuleInterface"> { "bool":$assumeThreadsOversubscription, "bool":$assumeNoThreadState, "bool":$assumeNoNestedParallelism, - "uint32_t":$openmpDeviceVersion), [{}], [{ + "uint32_t":$openmpDeviceVersion, + "bool":$noGPULib), [{}], [{ $_op->setAttr(("omp." + mlir::omp::FlagsAttr::getMnemonic()).str(), mlir::omp::FlagsAttr::get($_op->getContext(), debugKind, assumeTeamsOversubscription, assumeThreadsOversubscription, - assumeNoThreadState, assumeNoNestedParallelism, openmpDeviceVersion)); + assumeNoThreadState, assumeNoNestedParallelism, noGPULib, openmpDeviceVersion)); }]>, InterfaceMethod< /*description=*/[{ diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 629584683f499..e7aebc3ce4be5 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2035,6 +2035,12 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device", + attribute.getOpenmpDeviceVersion()); + + if (attribute.getNoGpuLib()) + return success(); + ompBuilder->createGlobalFlag( attribute.getDebugKind() /*LangOpts().OpenMPTargetDebug*/, "__omp_rtl_debug_kind"); @@ -2056,8 +2062,6 @@ LogicalResult convertFlagsAttr(Operation *op, mlir::omp::FlagsAttr attribute, .getAssumeNoNestedParallelism() /*LangOpts().OpenMPNoNestedParallelism*/ , "__omp_rtl_assume_no_nested_parallelism"); - ompBuilder->M.addModuleFlag(llvm::Module::Max, "openmp-device", - attribute.getOpenmpDeviceVersion()); return success(); } diff --git a/mlir/test/Dialect/OpenMP/attr.mlir b/mlir/test/Dialect/OpenMP/attr.mlir index 0cb6d0a0badd1..a9e4c82fe34aa 100644 --- a/mlir/test/Dialect/OpenMP/attr.mlir +++ b/mlir/test/Dialect/OpenMP/attr.mlir @@ -54,6 +54,12 @@ module attributes {omp.flags = #omp.flags} { module attributes {omp.flags = #omp.flags} {} +// CHECK: module attributes {omp.flags = #omp.flags} { +module attributes {omp.flags = #omp.flags} {} + +// CHECK: module attributes {omp.flags = #omp.flags} { +module attributes {omp.flags = #omp.flags} {} + // CHECK: module attributes {omp.version = #omp.version} { module attributes {omp.version = #omp.version} {} diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 1c02c0265462c..29baa84e7e19d 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -2530,6 +2530,16 @@ module attributes {omp.flags = #omp.flags