Skip to content

Commit

Permalink
[NVPTX] Add support for -march=native in standalone NVPTX (#79373)
Browse files Browse the repository at this point in the history
Summary:
We support `--target=nvptx64-nvidia-cuda` as a way to target the NVPTX
architecture from standard CPU. This patch simply uses the existing
support for handling `--offload-arch=native` to also apply to the
standalone toolchain.
  • Loading branch information
jhuber6 committed Jan 25, 2024
1 parent c2e5f4d commit 82d335e
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 31 deletions.
65 changes: 39 additions & 26 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -738,9 +738,22 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
if (!llvm::is_contained(*DAL, A))
DAL->append(A);

if (!DAL->hasArg(options::OPT_march_EQ))
if (!DAL->hasArg(options::OPT_march_EQ)) {
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
CudaArchToString(CudaArch::CudaDefault));
} else if (DAL->getLastArgValue(options::OPT_march_EQ) == "native") {
auto GPUsOrErr = getSystemGPUArchs(Args);
if (!GPUsOrErr) {
getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
<< getArchName() << llvm::toString(GPUsOrErr.takeError()) << "-march";
} else {
if (GPUsOrErr->size() > 1)
getDriver().Diag(diag::warn_drv_multi_gpu_arch)
<< getArchName() << llvm::join(*GPUsOrErr, ", ") << "-march";
DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ),
Args.MakeArgString(GPUsOrErr->front()));
}
}

return DAL;
}
Expand Down Expand Up @@ -783,6 +796,31 @@ void NVPTXToolChain::adjustDebugInfoKind(
}
}

Expected<SmallVector<std::string>>
NVPTXToolChain::getSystemGPUArchs(const ArgList &Args) const {
// Detect NVIDIA GPUs availible on the system.
std::string Program;
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
Program = A->getValue();
else
Program = GetProgramPath("nvptx-arch");

auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

SmallVector<std::string, 1> GPUArchs;
for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
if (!Arch.empty())
GPUArchs.push_back(Arch.str());

if (GPUArchs.empty())
return llvm::createStringError(std::error_code(),
"No NVIDIA GPU detected in the system");

return std::move(GPUArchs);
}

/// CUDA toolchain. Our assembler is ptxas, and our "linker" is fatbinary,
/// which isn't properly a linker but nonetheless performs the step of stitching
/// together object files from the assembler into a single blob.
Expand Down Expand Up @@ -948,31 +986,6 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
return DAL;
}

Expected<SmallVector<std::string>>
CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
// Detect NVIDIA GPUs availible on the system.
std::string Program;
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
Program = A->getValue();
else
Program = GetProgramPath("nvptx-arch");

auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

SmallVector<std::string, 1> GPUArchs;
for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
if (!Arch.empty())
GPUArchs.push_back(Arch.str());

if (GPUArchs.empty())
return llvm::createStringError(std::error_code(),
"No NVIDIA GPU detected in the system");

return std::move(GPUArchs);
}

Tool *NVPTXToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
Expand Down
10 changes: 5 additions & 5 deletions clang/lib/Driver/ToolChains/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
unsigned GetDefaultDwarfVersion() const override { return 2; }
unsigned getMaxDwarfVersion() const override { return 2; }

/// Uses nvptx-arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
virtual Expected<SmallVector<std::string>>
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;

CudaInstallationDetector CudaInstallation;

protected:
Expand Down Expand Up @@ -223,11 +228,6 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {

const ToolChain &HostTC;

/// Uses nvptx-arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
virtual Expected<SmallVector<std::string>>
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;

protected:
Tool *buildAssembler() const override; // ptxas
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
Expand Down
4 changes: 4 additions & 0 deletions clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/sh
echo sm_89
echo sm_80
exit 0
13 changes: 13 additions & 0 deletions clang/test/Driver/nvptx-cuda-system-arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
// RUN: mkdir -p %t
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_89_sm_80 %t/
// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
// RUN: chmod +x %t/nvptx_arch_fail
// RUN: chmod +x %t/nvptx_arch_sm_70
// RUN: chmod +x %t/nvptx_arch_sm_89_sm_80
// RUN: chmod +x %t/nvptx_arch_empty

// case when nvptx-arch returns nothing or fails
Expand All @@ -31,3 +33,14 @@
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --offload-new-driver --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

// case when nvptx-arch is used via '-march=native'
// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_70
// MARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

// case when nvptx-arch is used via '-march=native'
// RUN: %clang -### --target=nvptx64-nvidia-cuda -nogpulib -march=native --nvptx-arch-tool=%t/nvptx_arch_sm_89_sm_80 \
// RUN: --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda %s 2>&1 | FileCheck %s --check-prefix=MARCH-sm_89
// MARCH-sm_89: warning: multiple nvptx64 architectures are detected: sm_89, sm_80; only the first one is used for '-march' [-Wmulti-gpu]
// MARCH-sm_89: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_89"

0 comments on commit 82d335e

Please sign in to comment.