Skip to content

Commit

Permalink
[Clang][OpenMP] Add partial support for Static Device Libraries
Browse files Browse the repository at this point in the history
An archive containing device code object files can be passed to
clang command line for linking. For each given offload target
it creates a device specific archives which is either passed to llvm-link
if the target is amdgpu, or to clang-nvlink-wrapper if the target is
nvptx. -L/-l flags are used to specify these fat archives on the command
line. E.g.
  clang++ -fopenmp -fopenmp-targets=nvptx64 main.cpp -L. -lmylib

It currently doesn't support linking an archive directly, like:
  clang++ -fopenmp -fopenmp-targets=nvptx64 main.cpp libmylib.a

Linking with x86 offload also does not work.

Reviewed By: ye-luo

Differential Revision: https://reviews.llvm.org/D105191
  • Loading branch information
saiislam committed Oct 7, 2021
1 parent 393581d commit 4c41170
Show file tree
Hide file tree
Showing 9 changed files with 551 additions and 5 deletions.
4 changes: 4 additions & 0 deletions clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
Expand Up @@ -114,6 +114,10 @@ const char *AMDGCN::OpenMPLinker::constructLLVMLinkCommand(
}
}

AddStaticDeviceLibsLinking(C, *this, JA, Inputs, Args, CmdArgs, "amdgcn",
SubArchName,
/* bitcode SDL?*/ true,
/* PostClang Link? */ false);
// Add an intermediate output file.
CmdArgs.push_back("-o");
const char *OutputFileName =
Expand Down
37 changes: 34 additions & 3 deletions clang/lib/Driver/ToolChains/Clang.cpp
Expand Up @@ -7734,12 +7734,28 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA,
Triples += Action::GetOffloadKindName(CurKind);
Triples += '-';
Triples += CurTC->getTriple().normalize();
if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_OpenMP ||
CurKind == Action::OFK_Cuda) &&
if ((CurKind == Action::OFK_HIP || CurKind == Action::OFK_Cuda) &&
CurDep->getOffloadingArch()) {
Triples += '-';
Triples += CurDep->getOffloadingArch();
}

// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
// with each toolchain.
StringRef GPUArchName;
if (CurKind == Action::OFK_OpenMP) {
// Extract GPUArch from -march argument in TC argument list.
for (unsigned ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
auto ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
auto Arch = ArchStr.startswith_insensitive("-march=");
if (Arch) {
GPUArchName = ArchStr.substr(7);
Triples += "-";
break;
}
}
Triples += GPUArchName.str();
}
}
CmdArgs.push_back(TCArgs.MakeArgString(Triples));

Expand Down Expand Up @@ -7813,12 +7829,27 @@ void OffloadBundler::ConstructJobMultipleOutputs(
Triples += '-';
Triples += Dep.DependentToolChain->getTriple().normalize();
if ((Dep.DependentOffloadKind == Action::OFK_HIP ||
Dep.DependentOffloadKind == Action::OFK_OpenMP ||
Dep.DependentOffloadKind == Action::OFK_Cuda) &&
!Dep.DependentBoundArch.empty()) {
Triples += '-';
Triples += Dep.DependentBoundArch;
}
// TODO: Replace parsing of -march flag. Can be done by storing GPUArch
// with each toolchain.
StringRef GPUArchName;
if (Dep.DependentOffloadKind == Action::OFK_OpenMP) {
// Extract GPUArch from -march argument in TC argument list.
for (uint ArgIndex = 0; ArgIndex < TCArgs.size(); ArgIndex++) {
StringRef ArchStr = StringRef(TCArgs.getArgString(ArgIndex));
auto Arch = ArchStr.startswith_insensitive("-march=");
if (Arch) {
GPUArchName = ArchStr.substr(7);
Triples += "-";
break;
}
}
Triples += GPUArchName.str();
}
}

CmdArgs.push_back(TCArgs.MakeArgString(Triples));
Expand Down
287 changes: 287 additions & 0 deletions clang/lib/Driver/ToolChains/CommonArgs.cpp
Expand Up @@ -34,6 +34,7 @@
#include "clang/Driver/Util.h"
#include "clang/Driver/XRayArgs.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
Expand Down Expand Up @@ -1587,6 +1588,292 @@ void tools::addX86AlignBranchArgs(const Driver &D, const ArgList &Args,
}
}

/// SDLSearch: Search for Static Device Library
/// The search for SDL bitcode files is consistent with how static host
/// libraries are discovered. That is, the -l option triggers a search for
/// files in a set of directories called the LINKPATH. The host library search
/// procedure looks for a specific filename in the LINKPATH. The filename for
/// a host library is lib<libname>.a or lib<libname>.so. For SDLs, there is an
/// ordered-set of filenames that are searched. We call this ordered-set of
/// filenames as SEARCH-ORDER. Since an SDL can either be device-type specific,
/// architecture specific, or generic across all architectures, a naming
/// convention and search order is used where the file name embeds the
/// architecture name <arch-name> (nvptx or amdgcn) and the GPU device type
/// <device-name> such as sm_30 and gfx906. <device-name> is absent in case of
/// device-independent SDLs. To reduce congestion in host library directories,
/// the search first looks for files in the “libdevice” subdirectory. SDLs that
/// are bc files begin with the prefix “lib”.
///
/// Machine-code SDLs can also be managed as an archive (*.a file). The
/// convention has been to use the prefix “lib”. To avoid confusion with host
/// archive libraries, we use prefix "libbc-" for the bitcode SDL archives.
///
bool tools::SDLSearch(const Driver &D, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
SmallVector<std::string, 8> LibraryPaths, std::string Lib,
StringRef Arch, StringRef Target, bool isBitCodeSDL,
bool postClangLink) {
SmallVector<std::string, 12> SDLs;

std::string LibDeviceLoc = "/libdevice";
std::string LibBcPrefix = "/libbc-";
std::string LibPrefix = "/lib";

if (isBitCodeSDL) {
// SEARCH-ORDER for Bitcode SDLs:
// libdevice/libbc-<libname>-<arch-name>-<device-type>.a
// libbc-<libname>-<arch-name>-<device-type>.a
// libdevice/libbc-<libname>-<arch-name>.a
// libbc-<libname>-<arch-name>.a
// libdevice/libbc-<libname>.a
// libbc-<libname>.a
// libdevice/lib<libname>-<arch-name>-<device-type>.bc
// lib<libname>-<arch-name>-<device-type>.bc
// libdevice/lib<libname>-<arch-name>.bc
// lib<libname>-<arch-name>.bc
// libdevice/lib<libname>.bc
// lib<libname>.bc

for (StringRef Base : {LibBcPrefix, LibPrefix}) {
const auto *Ext = Base.contains(LibBcPrefix) ? ".a" : ".bc";

for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
Twine(Lib + "-" + Arch).str(), Twine(Lib).str()}) {
SDLs.push_back(Twine(LibDeviceLoc + Base + Suffix + Ext).str());
SDLs.push_back(Twine(Base + Suffix + Ext).str());
}
}
} else {
// SEARCH-ORDER for Machine-code SDLs:
// libdevice/lib<libname>-<arch-name>-<device-type>.a
// lib<libname>-<arch-name>-<device-type>.a
// libdevice/lib<libname>-<arch-name>.a
// lib<libname>-<arch-name>.a

const auto *Ext = ".a";

for (auto Suffix : {Twine(Lib + "-" + Arch + "-" + Target).str(),
Twine(Lib + "-" + Arch).str()}) {
SDLs.push_back(Twine(LibDeviceLoc + LibPrefix + Suffix + Ext).str());
SDLs.push_back(Twine(LibPrefix + Suffix + Ext).str());
}
}

// The CUDA toolchain does not use a global device llvm-link before the LLVM
// backend generates ptx. So currently, the use of bitcode SDL for nvptx is
// only possible with post-clang-cc1 linking. Clang cc1 has a feature that
// will link libraries after clang compilation while the LLVM IR is still in
// memory. This utilizes a clang cc1 option called “-mlink-builtin-bitcode”.
// This is a clang -cc1 option that is generated by the clang driver. The
// option value must a full path to an existing file.
bool FoundSDL = false;
for (auto LPath : LibraryPaths) {
for (auto SDL : SDLs) {
auto FullName = Twine(LPath + SDL).str();
if (llvm::sys::fs::exists(FullName)) {
if (postClangLink)
CC1Args.push_back("-mlink-builtin-bitcode");
CC1Args.push_back(DriverArgs.MakeArgString(FullName));
FoundSDL = true;
break;
}
}
if (FoundSDL)
break;
}
return FoundSDL;
}

/// Search if a user provided archive file lib<libname>.a exists in any of
/// the library paths. If so, add a new command to clang-offload-bundler to
/// unbundle this archive and create a temporary device specific archive. Name
/// of this SDL is passed to the llvm-link (for amdgcn) or to the
/// clang-nvlink-wrapper (for nvptx) commands by the driver.
bool tools::GetSDLFromOffloadArchive(
Compilation &C, const Driver &D, const Tool &T, const JobAction &JA,
const InputInfoList &Inputs, const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args, SmallVector<std::string, 8> LibraryPaths,
StringRef Lib, StringRef Arch, StringRef Target, bool isBitCodeSDL,
bool postClangLink) {

// We don't support bitcode archive bundles for nvptx
if (isBitCodeSDL && Arch.contains("nvptx"))
return false;

bool FoundAOB = false;
SmallVector<std::string, 2> AOBFileNames;
std::string ArchiveOfBundles;
for (auto LPath : LibraryPaths) {
ArchiveOfBundles.clear();

AOBFileNames.push_back(Twine(LPath + "/libdevice/lib" + Lib + ".a").str());
AOBFileNames.push_back(Twine(LPath + "/lib" + Lib + ".a").str());

for (auto AOB : AOBFileNames) {
if (llvm::sys::fs::exists(AOB)) {
ArchiveOfBundles = AOB;
FoundAOB = true;
break;
}
}

if (!FoundAOB)
continue;

StringRef Prefix = isBitCodeSDL ? "libbc-" : "lib";
std::string OutputLib = D.GetTemporaryPath(
Twine(Prefix + Lib + "-" + Arch + "-" + Target).str(), "a");

C.addTempFile(C.getArgs().MakeArgString(OutputLib.c_str()));

ArgStringList CmdArgs;
SmallString<128> DeviceTriple;
DeviceTriple += Action::GetOffloadKindName(JA.getOffloadingDeviceKind());
DeviceTriple += '-';
std::string NormalizedTriple = T.getToolChain().getTriple().normalize();
DeviceTriple += NormalizedTriple;
if (!Target.empty()) {
DeviceTriple += '-';
DeviceTriple += Target;
}

std::string UnbundleArg("-unbundle");
std::string TypeArg("-type=a");
std::string InputArg("-inputs=" + ArchiveOfBundles);
std::string OffloadArg("-targets=" + std::string(DeviceTriple));
std::string OutputArg("-outputs=" + OutputLib);

const char *UBProgram = DriverArgs.MakeArgString(
T.getToolChain().GetProgramPath("clang-offload-bundler"));

ArgStringList UBArgs;
UBArgs.push_back(C.getArgs().MakeArgString(UnbundleArg.c_str()));
UBArgs.push_back(C.getArgs().MakeArgString(TypeArg.c_str()));
UBArgs.push_back(C.getArgs().MakeArgString(InputArg.c_str()));
UBArgs.push_back(C.getArgs().MakeArgString(OffloadArg.c_str()));
UBArgs.push_back(C.getArgs().MakeArgString(OutputArg.c_str()));

// Add this flag to not exit from clang-offload-bundler if no compatible
// code object is found in heterogenous archive library.
std::string AdditionalArgs("-allow-missing-bundles");
UBArgs.push_back(C.getArgs().MakeArgString(AdditionalArgs.c_str()));

C.addCommand(std::make_unique<Command>(
JA, T, ResponseFileSupport::AtFileCurCP(), UBProgram, UBArgs, Inputs,
InputInfo(&JA, C.getArgs().MakeArgString(OutputLib.c_str()))));
if (postClangLink)
CC1Args.push_back("-mlink-builtin-bitcode");

CC1Args.push_back(DriverArgs.MakeArgString(OutputLib));
break;
}

return FoundAOB;
}

// Wrapper function used by driver for adding SDLs during link phase.
void tools::AddStaticDeviceLibsLinking(Compilation &C, const Tool &T,
const JobAction &JA,
const InputInfoList &Inputs,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {
AddStaticDeviceLibs(&C, &T, &JA, &Inputs, C.getDriver(), DriverArgs, CC1Args,
Arch, Target, isBitCodeSDL, postClangLink);
}

// Wrapper function used for post clang linking of bitcode SDLS for nvptx by
// the CUDA toolchain.
void tools::AddStaticDeviceLibsPostLinking(const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {
AddStaticDeviceLibs(nullptr, nullptr, nullptr, nullptr, D, DriverArgs,
CC1Args, Arch, Target, isBitCodeSDL, postClangLink);
}

// User defined Static Device Libraries(SDLs) can be passed to clang for
// offloading GPU compilers. Like static host libraries, the use of a SDL is
// specified with the -l command line option. The primary difference between
// host and SDLs is the filenames for SDLs (refer SEARCH-ORDER for Bitcode SDLs
// and SEARCH-ORDER for Machine-code SDLs for the naming convention).
// SDLs are of following types:
//
// * Bitcode SDLs: They can either be a *.bc file or an archive of *.bc files.
// For NVPTX, these libraries are post-clang linked following each
// compilation. For AMDGPU, these libraries are linked one time
// during the application link phase.
//
// * Machine-code SDLs: They are archive files. For NVPTX, the archive members
// contain cubin for Nvidia GPUs and are linked one time during the
// link phase by the CUDA SDK linker called nvlink. For AMDGPU, the
// process for machine code SDLs is still in development. But they
// will be linked by the LLVM tool lld.
//
// * Bundled objects that contain both host and device codes: Bundled objects
// may also contain library code compiled from source. For NVPTX, the
// bundle contains cubin. For AMDGPU, the bundle contains bitcode.
//
// For Bitcode and Machine-code SDLs, current compiler toolchains hardcode the
// inclusion of specific SDLs such as math libraries and the OpenMP device
// library libomptarget.
void tools::AddStaticDeviceLibs(Compilation *C, const Tool *T,
const JobAction *JA,
const InputInfoList *Inputs, const Driver &D,
const llvm::opt::ArgList &DriverArgs,
llvm::opt::ArgStringList &CC1Args,
StringRef Arch, StringRef Target,
bool isBitCodeSDL, bool postClangLink) {

SmallVector<std::string, 8> LibraryPaths;
// Add search directories from LIBRARY_PATH env variable
llvm::Optional<std::string> LibPath =
llvm::sys::Process::GetEnv("LIBRARY_PATH");
if (LibPath) {
SmallVector<StringRef, 8> Frags;
const char EnvPathSeparatorStr[] = {llvm::sys::EnvPathSeparator, '\0'};
llvm::SplitString(*LibPath, Frags, EnvPathSeparatorStr);
for (StringRef Path : Frags)
LibraryPaths.emplace_back(Path.trim());
}

// Add directories from user-specified -L options
for (std::string Search_Dir : DriverArgs.getAllArgValues(options::OPT_L))
LibraryPaths.emplace_back(Search_Dir);

// Add path to lib-debug folders
SmallString<256> DefaultLibPath = llvm::sys::path::parent_path(D.Dir);
llvm::sys::path::append(DefaultLibPath, Twine("lib") + CLANG_LIBDIR_SUFFIX);
LibraryPaths.emplace_back(DefaultLibPath.c_str());

// Build list of Static Device Libraries SDLs specified by -l option
llvm::SmallSet<std::string, 16> SDLNames;
static const StringRef HostOnlyArchives[] = {
"omp", "cudart", "m", "gcc", "gcc_s", "pthread", "hip_hcc"};
for (auto SDLName : DriverArgs.getAllArgValues(options::OPT_l)) {
if (!HostOnlyArchives->contains(SDLName)) {
SDLNames.insert(SDLName);
}
}

// The search stops as soon as an SDL file is found. The driver then provides
// the full filename of the SDL to the llvm-link or clang-nvlink-wrapper
// command. If no SDL is found after searching each LINKPATH with
// SEARCH-ORDER, it is possible that an archive file lib<libname>.a exists
// and may contain bundled object files.
for (auto SDLName : SDLNames) {
// This is the only call to SDLSearch
if (!SDLSearch(D, DriverArgs, CC1Args, LibraryPaths, SDLName, Arch, Target,
isBitCodeSDL, postClangLink)) {
GetSDLFromOffloadArchive(*C, D, *T, *JA, *Inputs, DriverArgs, CC1Args,
LibraryPaths, SDLName, Arch, Target,
isBitCodeSDL, postClangLink);
}
}
}

static llvm::opt::Arg *
getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
// The last of -mcode-object-v3, -mno-code-object-v3 and
Expand Down

0 comments on commit 4c41170

Please sign in to comment.