diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 2a48c063e243e..91a95def4f80d 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -70,6 +70,16 @@ def err_drv_no_rocm_device_lib : Error< def err_drv_no_hip_runtime : Error< "cannot find HIP runtime; provide its path via '--rocm-path', or pass " "'-nogpuinc' to build without HIP runtime">; +def err_drv_no_hipstdpar_lib : Error< + "cannot find HIP Standard Parallelism Acceleration library; provide it via " + "'--hipstdpar-path'">; +def err_drv_no_hipstdpar_thrust_lib : Error< + "cannot find rocThrust, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via " + "'--hipstdpar-thrust-path'">; +def err_drv_no_hipstdpar_prim_lib : Error< + "cannot find rocPrim, which is required by the HIP Standard Parallelism " + "Acceleration library; provide it via '--hipstdpar-prim-path'">; def err_drv_no_hipspv_device_lib : Error< "cannot find HIP device library%select{| for %1}0; provide its path via " diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 28c9bcec3ee60..c0ea4ecb9806a 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -280,6 +280,8 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP") LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)") +LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)") +LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)") LANGOPT(SizedDeallocation , 1, 0, "sized deallocation") LANGOPT(AlignedAllocation , 1, 0, "aligned allocation") diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index ee4e23f335e78..ff2130c93f28e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1258,6 +1258,32 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group, HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">; def hip_path_EQ : Joined<["--"], "hip-path=">, Group, HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">; +def hipstdpar : Flag<["--"], "hipstdpar">, + Visibility<[ClangOption, CC1Option]>, + Group, + HelpText<"Enable HIP acceleration for standard parallel algorithms">, + MarshallingInfoFlag>; +def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">, + Visibility<[ClangOption, CC1Option]>, + Group, + HelpText<"Replace all memory allocation / deallocation calls with " + "hipManagedMalloc / hipFree equivalents">, + MarshallingInfoFlag>; +// TODO: use MarshallingInfo here +def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group, + HelpText< + "HIP Standard Parallel Algorithm Acceleration library path, used for " + "finding and implicitly including the library header">; +def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">, + Group, + HelpText< + "rocThrust path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocThrust library">; +def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">, + Group, + HelpText< + "rocPrim path, required by the HIP Standard Parallel Algorithm " + "Acceleration library, used to implicitly include the rocPrim library">; def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group, HelpText<"ROCm device library path. Alternative to rocm-path.">; def : Joined<["--"], "hip-device-lib-path=">, Alias; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 84b8fc7685fed..77328e1f99e50 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -767,7 +767,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, [](std::pair &I) { return types::isHIP(I.first); }) || - C.getInputArgs().hasArg(options::OPT_hip_link); + C.getInputArgs().hasArg(options::OPT_hip_link) || + C.getInputArgs().hasArg(options::OPT_hipstdpar); if (IsCuda && IsHIP) { Diag(clang::diag::err_drv_mix_cuda_hip); return; @@ -2705,6 +2706,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, } } + if ((Ty == types::TY_C || Ty == types::TY_CXX) && + Args.hasArgNoClaim(options::OPT_hipstdpar)) + Ty = types::TY_HIP; + if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true)) Inputs.push_back(std::make_pair(Ty, A)); @@ -3915,6 +3920,11 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg); if (FinalPhase == phases::Link) { + if (Args.hasArgNoClaim(options::OPT_hipstdpar)) { + Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link)); + Args.AddFlagArg(nullptr, + getOpts().getOption(options::OPT_frtlib_add_rpath)); + } // Emitting LLVM while linking disabled except in HIPAMD Toolchain if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link)) Diag(clang::diag::err_drv_emit_llvm_link); diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index 3f08c0ef5d6f0..d4b33ad551c43 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -329,6 +329,20 @@ RocmInstallationDetector::RocmInstallationDetector( RocmDeviceLibPathArg = Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ); HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ); + HIPStdParPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ); + HasHIPStdParLibrary = + !HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg + + "/hipstdpar_lib.hpp"); + HIPRocThrustPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ); + HasRocThrustLibrary = !HIPRocThrustPathArg.empty() && + D.getVFS().exists(HIPRocThrustPathArg + "/thrust"); + HIPRocPrimPathArg = + Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ); + HasRocPrimLibrary = !HIPRocPrimPathArg.empty() && + D.getVFS().exists(HIPRocPrimPathArg + "/rocprim"); + if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) { HIPVersionArg = A->getValue(); unsigned Major = ~0U; @@ -507,6 +521,7 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) && !DriverArgs.hasArg(options::OPT_nohipwrapperinc); + bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar); if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { // HIP header includes standard library wrapper headers under clang @@ -529,8 +544,45 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, CC1Args.push_back(DriverArgs.MakeArgString(P)); } - if (DriverArgs.hasArg(options::OPT_nogpuinc)) + const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() { + if (!hasHIPStdParLibrary()) { + D.Diag(diag::err_drv_no_hipstdpar_lib); + return; + } + if (!HasRocThrustLibrary && + !D.getVFS().exists(getIncludePath() + "/thrust")) { + D.Diag(diag::err_drv_no_hipstdpar_thrust_lib); + return; + } + if (!HasRocPrimLibrary && + !D.getVFS().exists(getIncludePath() + "/rocprim")) { + D.Diag(diag::err_drv_no_hipstdpar_prim_lib); + return; + } + + const char *ThrustPath; + if (HasRocThrustLibrary) + ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg); + else + ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust"); + + const char *PrimPath; + if (HasRocPrimLibrary) + PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg); + else + PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim"); + + CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath, + "-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg), + "-include", "hipstdpar_lib.hpp"}); + }; + + if (DriverArgs.hasArg(options::OPT_nogpuinc)) { + if (HasHipStdPar) + HandleHipStdPar(); + return; + } if (!hasHIPRuntime()) { D.Diag(diag::err_drv_no_hip_runtime); @@ -541,6 +593,8 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs, CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath())); if (UsesRuntimeWrapper) CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"}); + if (HasHipStdPar) + HandleHipStdPar(); } void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a694d00b569a5..129adfb9fcc74 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6580,6 +6580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fhip-new-launch-api"); Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init, options::OPT_fno_gpu_allow_device_init); + Args.AddLastArg(CmdArgs, options::OPT_hipstdpar); + Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc); Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name, options::OPT_fno_hip_kernel_arg_name); } diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 3fc5669c06c39..ccb36a6c846c8 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -113,6 +113,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA, "--no-undefined", "-shared", "-plugin-opt=-amdgpu-internalize-symbols"}; + if (Args.hasArg(options::OPT_hipstdpar)) + LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar"); auto &TC = getToolChain(); auto &D = TC.getDriver(); @@ -242,6 +244,8 @@ void HIPAMDToolChain::addClangTargetOptions( if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"}); + if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar)) + CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"}); StringRef MaxThreadsPerBlock = DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ); diff --git a/clang/lib/Driver/ToolChains/ROCm.h b/clang/lib/Driver/ToolChains/ROCm.h index 554d8a6929ac5..dceb0ab036693 100644 --- a/clang/lib/Driver/ToolChains/ROCm.h +++ b/clang/lib/Driver/ToolChains/ROCm.h @@ -77,6 +77,9 @@ class RocmInstallationDetector { const Driver &D; bool HasHIPRuntime = false; bool HasDeviceLibrary = false; + bool HasHIPStdParLibrary = false; + bool HasRocThrustLibrary = false; + bool HasRocPrimLibrary = false; // Default version if not detected or specified. const unsigned DefaultVersionMajor = 3; @@ -96,6 +99,13 @@ class RocmInstallationDetector { std::vector RocmDeviceLibPathArg; // HIP runtime path specified by --hip-path. StringRef HIPPathArg; + // HIP Standard Parallel Algorithm acceleration library specified by + // --hipstdpar-path + StringRef HIPStdParPathArg; + // rocThrust algorithm library specified by --hipstdpar-thrust-path + StringRef HIPRocThrustPathArg; + // rocPrim algorithm library specified by --hipstdpar-prim-path + StringRef HIPRocPrimPathArg; // HIP version specified by --hip-version. StringRef HIPVersionArg; // Wheter -nogpulib is specified. @@ -180,6 +190,9 @@ class RocmInstallationDetector { /// Check whether we detected a valid ROCm device library. bool hasDeviceLibrary() const { return HasDeviceLibrary; } + /// Check whether we detected a valid HIP STDPAR Acceleration library. + bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; } + /// Print information about the detected ROCm installation. void print(raw_ostream &OS) const; diff --git a/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp b/clang/test/Driver/Inputs/hipstdpar/hipstdpar_lib.hpp new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep b/clang/test/Driver/Inputs/hipstdpar/rocprim/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/hipstdpar/thrust/.keep b/clang/test/Driver/Inputs/hipstdpar/thrust/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/hipstdpar.c b/clang/test/Driver/hipstdpar.c new file mode 100644 index 0000000000000..b4fd815d9a764 --- /dev/null +++ b/clang/test/Driver/hipstdpar.c @@ -0,0 +1,18 @@ +// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s +// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \ +// RUN: --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \ +// RUN: --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \ +// RUN: -nogpulib -nogpuinc --compile %s 2>&1 | \ +// RUN: FileCheck --check-prefix=HIPSTDPAR-COMPILE %s +// RUN: touch %t.o +// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s + +// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path' +// HIPSTDPAR-COMPILE: "-x" "hip" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}" +// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}" +// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp" +// HIPSTDPAR-LINK: "-rpath" +// HIPSTDPAR-LINK: "-l{{.*hip.*}}"