Skip to content

Commit

Permalink
AMDGPU: Teach toolchain to link rocm device libs
Browse files Browse the repository at this point in the history
Currently the library is separately linked, but this isn't correct to
implement fast math flags correctly. Each module should get the
version of the library appropriate for its combination of fast math
and related flags, with the attributes propagated into its functions
and internalized.

HIP already maintains the list of libraries, but this is not used for
OpenCL. Unfortunately, HIP uses a separate --hip-device-lib argument,
despite both languages using the same bitcode library. Eventually
these two searches need to be merged.

An additional problem is there are 3 different locations the libraries
are installed, depending on which build is used. This also needs to be
consolidated (or at least the search logic needs to deal with this
unnecessary complexity).
  • Loading branch information
arsenm committed Apr 10, 2020
1 parent f5be71b commit 4593e41
Show file tree
Hide file tree
Showing 34 changed files with 630 additions and 14 deletions.
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ def err_drv_no_cuda_installation : Error<
def err_drv_no_cuda_libdevice : Error<
"cannot find libdevice for %0. Provide path to different CUDA installation "
"via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;

def err_drv_no_rocm_installation : Error<
"cannot find ROCm installation. Provide its path via --rocm-path, or pass "
"-nogpulib.">;
def err_drv_no_rocm_device_lib : Error<
"cannot find device library for %0. Provide path to different ROCm installation "
"via --rocm-path, or pass -nogpulib to build without linking default libraries.">;

def err_drv_cuda_version_unsupported : Error<
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
"but installation at %3 is %4. Use --cuda-path to specify a different CUDA "
Expand Down
2 changes: 2 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,8 @@ def : Flag<["-"], "fno-cuda-rdc">, Alias<fno_gpu_rdc>;
def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
HelpText<"ROCm installation path">;
def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
HelpText<"HIP device library path">;
def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4857,6 +4857,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
break;
case llvm::Triple::AMDHSA:
TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
break;
case llvm::Triple::AMDPAL:
case llvm::Triple::Mesa3D:
TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);
Expand Down
251 changes: 250 additions & 1 deletion clang/lib/Driver/ToolChains/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,171 @@
#include "clang/Driver/Compilation.h"
#include "clang/Driver/DriverDiagnostic.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/TargetParser.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/VirtualFileSystem.h"

using namespace clang::driver;
using namespace clang::driver::tools;
using namespace clang::driver::toolchains;
using namespace clang;
using namespace llvm::opt;

RocmInstallationDetector::RocmInstallationDetector(
const Driver &D, const llvm::Triple &HostTriple,
const llvm::opt::ArgList &Args)
: D(D) {
struct Candidate {
std::string Path;
bool StrictChecking;

Candidate(std::string Path, bool StrictChecking = false)
: Path(Path), StrictChecking(StrictChecking) {}
};

SmallVector<Candidate, 4> Candidates;

if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
Candidates.emplace_back(
Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
} else {
// Try to find relative to the compiler binary.
const char *InstallDir = D.getInstalledDir();

// Check both a normal Unix prefix position of the clang binary, as well as
// the Windows-esque layout the ROCm packages use with the host architecture
// subdirectory of bin.

StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
if (ParentDir == HostTriple.getArchName())
ParentDir = llvm::sys::path::parent_path(ParentDir);

if (ParentDir == "bin") {
Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
/*StrictChecking=*/true);
}

Candidates.emplace_back(D.SysRoot + "/opt/rocm");
}

bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);

for (const auto &Candidate : Candidates) {
InstallPath = Candidate.Path;
if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
continue;

// FIXME: The install path situation is a real mess.

// For a cmake install, these are placed directly in
// ${INSTALL_PREFIX}/lib

// In the separate OpenCL builds, the bitcode libraries are placed in
// ${OPENCL_ROOT}/lib/x86_64/bitcode/*

// For the rocm installed packages, these are placed at
// /opt/rocm/opencl/lib/x86_64/bitcode

// An additional copy is installed, in scattered locations between
// /opt/rocm/hcc/rocdl/oclc
// /opt/rocm/hcc/rocdl/ockl
// /opt/rocm/hcc/rocdl/lib
//
// Yet another complete set is installed to
// /opt/rocm/hcc/rocdl/lib

// For now just recognize the opencl package layout.

// BinPath = InstallPath + "/bin";
llvm::sys::path::append(IncludePath, InstallPath, "include");
llvm::sys::path::append(LibDevicePath, InstallPath, "lib");

auto &FS = D.getVFS();

// We don't need the include path for OpenCL, since clang already ships with
// the default header.

bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
if (CheckLibDevice && !FS.exists(LibDevicePath))
continue;

const StringRef Suffix(".amdgcn.bc");

std::error_code EC;
for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
!EC && LI != LE; LI = LI.increment(EC)) {
StringRef FilePath = LI->path();
StringRef FileName = llvm::sys::path::filename(FilePath);
if (!FileName.endswith(Suffix))
continue;

StringRef BaseName = FileName.drop_back(Suffix.size());

if (BaseName == "ocml") {
OCML = FilePath;
} else if (BaseName == "ockl") {
OCKL = FilePath;
} else if (BaseName == "opencl") {
OpenCL = FilePath;
} else if (BaseName == "hip") {
HIP = FilePath;
} else if (BaseName == "oclc_finite_only_off") {
FiniteOnly.Off = FilePath;
} else if (BaseName == "oclc_finite_only_on") {
FiniteOnly.On = FilePath;
} else if (BaseName == "oclc_daz_opt_on") {
DenormalsAreZero.On = FilePath;
} else if (BaseName == "oclc_daz_opt_off") {
DenormalsAreZero.Off = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
CorrectlyRoundedSqrt.On = FilePath;
} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
CorrectlyRoundedSqrt.Off = FilePath;
} else if (BaseName == "oclc_unsafe_math_on") {
UnsafeMath.On = FilePath;
} else if (BaseName == "oclc_unsafe_math_off") {
UnsafeMath.Off = FilePath;
} else if (BaseName == "oclc_wavefrontsize64_on") {
WavefrontSize64.On = FilePath;
} else if (BaseName == "oclc_wavefrontsize64_off") {
WavefrontSize64.Off = FilePath;
} else {
// Process all bitcode filenames that look like
// ocl_isa_version_XXX.amdgcn.bc
const StringRef DeviceLibPrefix = "oclc_isa_version_";
if (!BaseName.startswith(DeviceLibPrefix))
continue;

StringRef IsaVersionNumber =
BaseName.drop_front(DeviceLibPrefix.size());

llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
SmallString<8> Tmp;
LibDeviceMap.insert(
std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
}
}

if (!NoBuiltinLibs) {
// Check that the required non-target libraries are all available.
if (!allGenericLibsValid())
continue;

// Check that we have found at least one libdevice that we can link in if
// -nobuiltinlib hasn't been specified.
if (LibDeviceMap.empty())
continue;
}

IsValid = true;
break;
}
}

void RocmInstallationDetector::print(raw_ostream &OS) const {
if (isValid())
OS << "Found ROCm installation: " << InstallPath << '\n';
}

void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
const InputInfo &Output,
const InputInfoList &Inputs,
Expand Down Expand Up @@ -142,6 +299,12 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
llvm::DenormalMode::getIEEE();
}

/// ROCM Toolchain
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
: AMDGPUToolChain(D, Triple, Args),
RocmInstallation(D, Triple, Args) { }

void AMDGPUToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
Expand All @@ -155,3 +318,89 @@ void AMDGPUToolChain::addClangTargetOptions(
CC1Args.push_back("-fapply-global-visibility-to-externs");
}
}

void ROCMToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
Action::OffloadKind DeviceOffloadingKind) const {
AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
DeviceOffloadingKind);

if (DriverArgs.hasArg(options::OPT_nogpulib))
return;

if (!RocmInstallation.isValid()) {
getDriver().Diag(diag::err_drv_no_rocm_installation);
return;
}

// Get the device name and canonicalize it
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
if (LibDeviceFile.empty()) {
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
return;
}

const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);

bool Wave64 = !HasWave32 || DriverArgs.hasFlag(
options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);

// TODO: There are way too many flags that change this. Do we need to check
// them all?
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
getDefaultDenormsAreZeroForTarget(Kind);
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);

bool UnsafeMathOpt =
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
bool CorrectSqrt =
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);

// Add the OpenCL specific bitcode library.
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));

// Add the generic set of libraries.
RocmInstallation.addCommonBitcodeLibCC1Args(
DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
}

void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(
getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(
getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(
DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));

CC1Args.push_back(LinkBitcodeFlag);
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
}

0 comments on commit 4593e41

Please sign in to comment.