Skip to content

Commit

Permalink
[HIP][Clang][Driver] Add Driver support for hipstdpar
Browse files Browse the repository at this point in the history
This patch adds the Driver changes needed for enabling HIP parallel algorithm offload on AMDGPU targets. What this change does can be summed up as follows:

- add two flags, one for enabling `hipstdpar` compilation, the second enabling the optional allocation interposition mode;
- the flags correspond to new LangOpt members;
- if we are compiling or linking with --hipstdpar, we enable HIP; in the compilation case C and C++ inputs are treated as HIP inputs;
- the ROCm / AMDGPU driver is augmented to look for and include an implementation detail forwarding header; we error out if the user requested `hipstdpar` but the header or its dependencies cannot be found.

Tests for the behaviour described above are also added.

Reviewed by: MaskRay, yaxunl

Differential Revision: https://reviews.llvm.org/D155775
  • Loading branch information
AlexVlx committed Oct 3, 2023
1 parent 481df27 commit 9a40858
Show file tree
Hide file tree
Showing 12 changed files with 141 additions and 2 deletions.
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ def err_drv_no_rocm_device_lib : Error<
def err_drv_no_hip_runtime : Error<
"cannot find HIP runtime; provide its path via '--rocm-path', or pass "
"'-nogpuinc' to build without HIP runtime">;
def err_drv_no_hipstdpar_lib : Error<
"cannot find HIP Standard Parallelism Acceleration library; provide it via "
"'--hipstdpar-path'">;
def err_drv_no_hipstdpar_thrust_lib : Error<
"cannot find rocThrust, which is required by the HIP Standard Parallelism "
"Acceleration library; provide it via "
"'--hipstdpar-thrust-path'">;
def err_drv_no_hipstdpar_prim_lib : Error<
"cannot find rocPrim, which is required by the HIP Standard Parallelism "
"Acceleration library; provide it via '--hipstdpar-prim-path'">;

def err_drv_no_hipspv_device_lib : Error<
"cannot find HIP device library%select{| for %1}0; provide its path via "
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,8 @@ ENUM_LANGOPT(SYCLVersion , SYCLMajorVersion, 2, SYCL_None, "Version of the SYCL

LANGOPT(HIPUseNewLaunchAPI, 1, 0, "Use new kernel launching API for HIP")
LANGOPT(OffloadUniformBlock, 1, 0, "Assume that kernels are launched with uniform block sizes (default true for CUDA/HIP and false otherwise)")
LANGOPT(HIPStdPar, 1, 0, "Enable Standard Parallel Algorithm Acceleration for HIP (experimental)")
LANGOPT(HIPStdParInterposeAlloc, 1, 0, "Replace allocations / deallocations with HIP RT calls when Standard Parallel Algorithm Acceleration for HIP is enabled (Experimental)")

LANGOPT(SizedDeallocation , 1, 0, "sized deallocation")
LANGOPT(AlignedAllocation , 1, 0, "aligned allocation")
Expand Down
26 changes: 26 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -1258,6 +1258,32 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<hip_Group>,
HelpText<"ROCm installation path, used for finding and automatically linking required bitcode libraries.">;
def hip_path_EQ : Joined<["--"], "hip-path=">, Group<hip_Group>,
HelpText<"HIP runtime installation path, used for finding HIP version and adding HIP include path.">;
def hipstdpar : Flag<["--"], "hipstdpar">,
Visibility<[ClangOption, CC1Option]>,
Group<CompileOnly_Group>,
HelpText<"Enable HIP acceleration for standard parallel algorithms">,
MarshallingInfoFlag<LangOpts<"HIPStdPar">>;
def hipstdpar_interpose_alloc : Flag<["--"], "hipstdpar-interpose-alloc">,
Visibility<[ClangOption, CC1Option]>,
Group<CompileOnly_Group>,
HelpText<"Replace all memory allocation / deallocation calls with "
"hipManagedMalloc / hipFree equivalents">,
MarshallingInfoFlag<LangOpts<"HIPStdParInterposeAlloc">>;
// TODO: use MarshallingInfo here
def hipstdpar_path_EQ : Joined<["--"], "hipstdpar-path=">, Group<i_Group>,
HelpText<
"HIP Standard Parallel Algorithm Acceleration library path, used for "
"finding and implicitly including the library header">;
def hipstdpar_thrust_path_EQ : Joined<["--"], "hipstdpar-thrust-path=">,
Group<i_Group>,
HelpText<
"rocThrust path, required by the HIP Standard Parallel Algorithm "
"Acceleration library, used to implicitly include the rocThrust library">;
def hipstdpar_prim_path_EQ : Joined<["--"], "hipstdpar-prim-path=">,
Group<i_Group>,
HelpText<
"rocPrim path, required by the HIP Standard Parallel Algorithm "
"Acceleration library, used to implicitly include the rocPrim library">;
def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, Group<hip_Group>,
HelpText<"ROCm device library path. Alternative to rocm-path.">;
def : Joined<["--"], "hip-device-lib-path=">, Alias<rocm_device_lib_path_EQ>;
Expand Down
12 changes: 11 additions & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,7 +767,8 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
[](std::pair<types::ID, const llvm::opt::Arg *> &I) {
return types::isHIP(I.first);
}) ||
C.getInputArgs().hasArg(options::OPT_hip_link);
C.getInputArgs().hasArg(options::OPT_hip_link) ||
C.getInputArgs().hasArg(options::OPT_hipstdpar);
if (IsCuda && IsHIP) {
Diag(clang::diag::err_drv_mix_cuda_hip);
return;
Expand Down Expand Up @@ -2705,6 +2706,10 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args,
}
}

if ((Ty == types::TY_C || Ty == types::TY_CXX) &&
Args.hasArgNoClaim(options::OPT_hipstdpar))
Ty = types::TY_HIP;

if (DiagnoseInputExistence(Args, Value, Ty, /*TypoCorrect=*/true))
Inputs.push_back(std::make_pair(Ty, A));

Expand Down Expand Up @@ -3915,6 +3920,11 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args,
phases::ID FinalPhase = getFinalPhase(Args, &FinalPhaseArg);

if (FinalPhase == phases::Link) {
if (Args.hasArgNoClaim(options::OPT_hipstdpar)) {
Args.AddFlagArg(nullptr, getOpts().getOption(options::OPT_hip_link));
Args.AddFlagArg(nullptr,
getOpts().getOption(options::OPT_frtlib_add_rpath));
}
// Emitting LLVM while linking disabled except in HIPAMD Toolchain
if (Args.hasArg(options::OPT_emit_llvm) && !Args.hasArg(options::OPT_hip_link))
Diag(clang::diag::err_drv_emit_llvm_link);
Expand Down
56 changes: 55 additions & 1 deletion clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,20 @@ RocmInstallationDetector::RocmInstallationDetector(
RocmDeviceLibPathArg =
Args.getAllArgValues(clang::driver::options::OPT_rocm_device_lib_path_EQ);
HIPPathArg = Args.getLastArgValue(clang::driver::options::OPT_hip_path_EQ);
HIPStdParPathArg =
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_path_EQ);
HasHIPStdParLibrary =
!HIPStdParPathArg.empty() && D.getVFS().exists(HIPStdParPathArg +
"/hipstdpar_lib.hpp");
HIPRocThrustPathArg =
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_thrust_path_EQ);
HasRocThrustLibrary = !HIPRocThrustPathArg.empty() &&
D.getVFS().exists(HIPRocThrustPathArg + "/thrust");
HIPRocPrimPathArg =
Args.getLastArgValue(clang::driver::options::OPT_hipstdpar_prim_path_EQ);
HasRocPrimLibrary = !HIPRocPrimPathArg.empty() &&
D.getVFS().exists(HIPRocPrimPathArg + "/rocprim");

if (auto *A = Args.getLastArg(clang::driver::options::OPT_hip_version_EQ)) {
HIPVersionArg = A->getValue();
unsigned Major = ~0U;
Expand Down Expand Up @@ -507,6 +521,7 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
ArgStringList &CC1Args) const {
bool UsesRuntimeWrapper = VersionMajorMinor > llvm::VersionTuple(3, 5) &&
!DriverArgs.hasArg(options::OPT_nohipwrapperinc);
bool HasHipStdPar = DriverArgs.hasArg(options::OPT_hipstdpar);

if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) {
// HIP header includes standard library wrapper headers under clang
Expand All @@ -529,8 +544,45 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
CC1Args.push_back(DriverArgs.MakeArgString(P));
}

if (DriverArgs.hasArg(options::OPT_nogpuinc))
const auto HandleHipStdPar = [=, &DriverArgs, &CC1Args]() {
if (!hasHIPStdParLibrary()) {
D.Diag(diag::err_drv_no_hipstdpar_lib);
return;
}
if (!HasRocThrustLibrary &&
!D.getVFS().exists(getIncludePath() + "/thrust")) {
D.Diag(diag::err_drv_no_hipstdpar_thrust_lib);
return;
}
if (!HasRocPrimLibrary &&
!D.getVFS().exists(getIncludePath() + "/rocprim")) {
D.Diag(diag::err_drv_no_hipstdpar_prim_lib);
return;
}

const char *ThrustPath;
if (HasRocThrustLibrary)
ThrustPath = DriverArgs.MakeArgString(HIPRocThrustPathArg);
else
ThrustPath = DriverArgs.MakeArgString(getIncludePath() + "/thrust");

const char *PrimPath;
if (HasRocPrimLibrary)
PrimPath = DriverArgs.MakeArgString(HIPRocPrimPathArg);
else
PrimPath = DriverArgs.MakeArgString(getIncludePath() + "/rocprim");

CC1Args.append({"-idirafter", ThrustPath, "-idirafter", PrimPath,
"-idirafter", DriverArgs.MakeArgString(HIPStdParPathArg),
"-include", "hipstdpar_lib.hpp"});
};

if (DriverArgs.hasArg(options::OPT_nogpuinc)) {
if (HasHipStdPar)
HandleHipStdPar();

return;
}

if (!hasHIPRuntime()) {
D.Diag(diag::err_drv_no_hip_runtime);
Expand All @@ -541,6 +593,8 @@ void RocmInstallationDetector::AddHIPIncludeArgs(const ArgList &DriverArgs,
CC1Args.push_back(DriverArgs.MakeArgString(getIncludePath()));
if (UsesRuntimeWrapper)
CC1Args.append({"-include", "__clang_hip_runtime_wrapper.h"});
if (HasHipStdPar)
HandleHipStdPar();
}

void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6580,6 +6580,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back("-fhip-new-launch-api");
Args.addOptInFlag(CmdArgs, options::OPT_fgpu_allow_device_init,
options::OPT_fno_gpu_allow_device_init);
Args.AddLastArg(CmdArgs, options::OPT_hipstdpar);
Args.AddLastArg(CmdArgs, options::OPT_hipstdpar_interpose_alloc);
Args.addOptInFlag(CmdArgs, options::OPT_fhip_kernel_arg_name,
options::OPT_fno_hip_kernel_arg_name);
}
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/HIPAMD.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,8 @@ void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
"--no-undefined",
"-shared",
"-plugin-opt=-amdgpu-internalize-symbols"};
if (Args.hasArg(options::OPT_hipstdpar))
LldArgs.push_back("-plugin-opt=-amdgpu-enable-hipstdpar");

auto &TC = getToolChain();
auto &D = TC.getDriver();
Expand Down Expand Up @@ -242,6 +244,8 @@ void HIPAMDToolChain::addClangTargetOptions(
if (!DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
false))
CC1Args.append({"-mllvm", "-amdgpu-internalize-symbols"});
if (DriverArgs.hasArgNoClaim(options::OPT_hipstdpar))
CC1Args.append({"-mllvm", "-amdgpu-enable-hipstdpar"});

StringRef MaxThreadsPerBlock =
DriverArgs.getLastArgValue(options::OPT_gpu_max_threads_per_block_EQ);
Expand Down
13 changes: 13 additions & 0 deletions clang/lib/Driver/ToolChains/ROCm.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class RocmInstallationDetector {
const Driver &D;
bool HasHIPRuntime = false;
bool HasDeviceLibrary = false;
bool HasHIPStdParLibrary = false;
bool HasRocThrustLibrary = false;
bool HasRocPrimLibrary = false;

// Default version if not detected or specified.
const unsigned DefaultVersionMajor = 3;
Expand All @@ -96,6 +99,13 @@ class RocmInstallationDetector {
std::vector<std::string> RocmDeviceLibPathArg;
// HIP runtime path specified by --hip-path.
StringRef HIPPathArg;
// HIP Standard Parallel Algorithm acceleration library specified by
// --hipstdpar-path
StringRef HIPStdParPathArg;
// rocThrust algorithm library specified by --hipstdpar-thrust-path
StringRef HIPRocThrustPathArg;
// rocPrim algorithm library specified by --hipstdpar-prim-path
StringRef HIPRocPrimPathArg;
// HIP version specified by --hip-version.
StringRef HIPVersionArg;
// Wheter -nogpulib is specified.
Expand Down Expand Up @@ -180,6 +190,9 @@ class RocmInstallationDetector {
/// Check whether we detected a valid ROCm device library.
bool hasDeviceLibrary() const { return HasDeviceLibrary; }

/// Check whether we detected a valid HIP STDPAR Acceleration library.
bool hasHIPStdParLibrary() const { return HasHIPStdParLibrary; }

/// Print information about the detected ROCm installation.
void print(raw_ostream &OS) const;

Expand Down
Empty file.
Empty file.
Empty file.
18 changes: 18 additions & 0 deletions clang/test/Driver/hipstdpar.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: not %clang -### --hipstdpar -nogpulib -nogpuinc --compile %s 2>&1 | \
// RUN: FileCheck --check-prefix=HIPSTDPAR-MISSING-LIB %s
// RUN: %clang -### --hipstdpar --hipstdpar-path=%S/Inputs/hipstdpar \
// RUN: --hipstdpar-thrust-path=%S/Inputs/hipstdpar/thrust \
// RUN: --hipstdpar-prim-path=%S/Inputs/hipstdpar/rocprim \
// RUN: -nogpulib -nogpuinc --compile %s 2>&1 | \
// RUN: FileCheck --check-prefix=HIPSTDPAR-COMPILE %s
// RUN: touch %t.o
// RUN: %clang -### --hipstdpar %t.o 2>&1 | FileCheck --check-prefix=HIPSTDPAR-LINK %s

// HIPSTDPAR-MISSING-LIB: error: cannot find HIP Standard Parallelism Acceleration library; provide it via '--hipstdpar-path'
// HIPSTDPAR-COMPILE: "-x" "hip"
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/thrust}}"
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/rocprim}}"
// HIPSTDPAR-COMPILE: "-idirafter" "{{.*/Inputs/hipstdpar}}"
// HIPSTDPAR-COMPILE: "-include" "hipstdpar_lib.hpp"
// HIPSTDPAR-LINK: "-rpath"
// HIPSTDPAR-LINK: "-l{{.*hip.*}}"

0 comments on commit 9a40858

Please sign in to comment.