9 changes: 3 additions & 6 deletions clang/lib/Driver/ToolChains/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,10 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
/// Should skip argument.
bool shouldSkipArgument(const llvm::opt::Arg *Arg) const;

/// Uses amdgpu_arch tool to get arch of the system GPU. Will return error
/// Uses amdgpu-arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
llvm::Error getSystemGPUArch(const llvm::opt::ArgList &Args,
std::string &GPUArch) const;

llvm::Error detectSystemGPUs(const llvm::opt::ArgList &Args,
SmallVector<std::string, 1> &GPUArchs) const;
virtual Expected<SmallVector<std::string>>
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;

protected:
/// Check and diagnose invalid target ID specified by -mcpu.
Expand Down
17 changes: 14 additions & 3 deletions clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,24 @@ namespace {

static bool checkSystemForAMDGPU(const ArgList &Args, const AMDGPUToolChain &TC,
std::string &GPUArch) {
if (auto Err = TC.getSystemGPUArch(Args, GPUArch)) {
auto CheckError = [&](llvm::Error Err) -> bool {
std::string ErrMsg =
llvm::formatv("{0}", llvm::fmt_consume(std::move(Err)));
TC.getDriver().Diag(diag::err_drv_undetermined_amdgpu_arch) << ErrMsg;
TC.getDriver().Diag(diag::err_drv_undetermined_gpu_arch)
<< llvm::Triple::getArchTypeName(TC.getArch()) << ErrMsg << "-march";
return false;
}
};

auto ArchsOrErr = TC.getSystemGPUArchs(Args);
if (!ArchsOrErr)
return CheckError(ArchsOrErr.takeError());

if (ArchsOrErr->size() > 1)
if (!llvm::all_equal(*ArchsOrErr))
return CheckError(llvm::createStringError(
std::error_code(), "Multiple AMD GPUs found with different archs"));

GPUArch = ArchsOrErr->front();
return true;
}
} // namespace
Expand Down
25 changes: 25 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,31 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
return DAL;
}

Expected<SmallVector<std::string>>
CudaToolChain::getSystemGPUArchs(const ArgList &Args) const {
// Detect NVIDIA GPUs availible on the system.
std::string Program;
if (Arg *A = Args.getLastArg(options::OPT_nvptx_arch_tool_EQ))
Program = A->getValue();
else
Program = GetProgramPath("nvptx-arch");

auto StdoutOrErr = executeToolChainProgram(Program);
if (!StdoutOrErr)
return StdoutOrErr.takeError();

SmallVector<std::string, 1> GPUArchs;
for (StringRef Arch : llvm::split((*StdoutOrErr)->getBuffer(), "\n"))
if (!Arch.empty())
GPUArchs.push_back(Arch.str());

if (GPUArchs.empty())
return llvm::createStringError(std::error_code(),
"No NVIDIA GPU detected in the system");

return GPUArchs;
}

Tool *CudaToolChain::buildAssembler() const {
return new tools::NVPTX::Assembler(*this);
}
Expand Down
5 changes: 5 additions & 0 deletions clang/lib/Driver/ToolChains/Cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public ToolChain {
const ToolChain &HostTC;
CudaInstallationDetector CudaInstallation;

/// Uses nvptx-arch tool to get arch of the system GPU. Will return error
/// if unable to find one.
virtual Expected<SmallVector<std::string>>
getSystemGPUArchs(const llvm::opt::ArgList &Args) const override;

protected:
Tool *buildAssembler() const override; // ptxas
Tool *buildLinker() const override; // fatbinary (ok, not really a linker)
Expand Down
2 changes: 2 additions & 0 deletions clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_fail
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/sh
exit 1
3 changes: 3 additions & 0 deletions clang/test/Driver/Inputs/nvptx-arch/nvptx_arch_sm_70
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh
echo sm_70
exit 0
33 changes: 33 additions & 0 deletions clang/test/Driver/amdgpu-hip-system-arch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// REQUIRES: system-linux
// REQUIRES: x86-registered-target
// REQUIRES: amdgpu-registered-target
// REQUIRES: shell

// RUN: mkdir -p %t
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty
// RUN: chmod +x %t/amdgpu_arch_fail
// RUN: chmod +x %t/amdgpu_arch_gfx906
// RUN: chmod +x %t/amdgpu_arch_empty

// case when amdgpu-arch returns nothing or fails
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_fail -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '--offload-arch'

// case when amdgpu-arch does not return anything with successful execution
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_empty -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '--offload-arch'

// case when amdgpu-arch returns a gfx906 GPU.
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 -x hip %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
6 changes: 3 additions & 3 deletions clang/test/Driver/amdgpu-openmp-system-arch-fail.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
// case when amdgpu_arch returns nothing or fails
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// NO-OUTPUT-ERROR: error: cannot determine AMDGPU architecture{{.*}}Exited with error code 1; consider passing it via '--march'
// NO-OUTPUT-ERROR: error: cannot determine amdgcn architecture{{.*}}; consider passing it via '-march'

// case when amdgpu_arch returns multiple gpus but all are different
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_different %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=MULTIPLE-OUTPUT-ERROR
// MULTIPLE-OUTPUT-ERROR: error: cannot determine AMDGPU architecture: Multiple AMD GPUs found with different archs; consider passing it via '--march'
// MULTIPLE-OUTPUT-ERROR: error: cannot determine amdgcn architecture: Multiple AMD GPUs found with different archs; consider passing it via '-march'

// case when amdgpu_arch does not return anything with successful execution
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fopenmp=libomp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
// EMPTY-OUTPUT: error: cannot determine AMDGPU architecture: No AMD GPU detected in the system; consider passing it via '--march'
// EMPTY-OUTPUT: error: cannot determine amdgcn architecture: No AMD GPU detected in the system; consider passing it via '-march'
33 changes: 33 additions & 0 deletions clang/test/Driver/nvptx-cuda-system-arch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// REQUIRES: system-linux
// REQUIRES: x86-registered-target
// REQUIRES: nvptx-registered-target
// REQUIRES: shell

// RUN: mkdir -p %t
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
// RUN: chmod +x %t/nvptx_arch_fail
// RUN: chmod +x %t/nvptx_arch_sm_70
// RUN: chmod +x %t/nvptx_arch_empty

// case when nvptx-arch returns nothing or fails
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_fail -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// NO-OUTPUT-ERROR: error: cannot determine nvptx64 architecture{{.*}}; consider passing it via '--offload-arch'

// case when nvptx-arch does not return anything with successful execution
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-new-driver --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_empty -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=EMPTY-OUTPUT
// EMPTY-OUTPUT: error: cannot determine nvptx64 architecture: No NVIDIA GPU detected in the system; consider passing it via '--offload-arch'

// case when nvptx-arch does not return anything with successful execution
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib --offload-arch=native --offload-new-driver --nvptx-arch-tool=%t/nvptx_arch_sm_70 -x cuda %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-sm_70
// ARCH-sm_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
4 changes: 3 additions & 1 deletion clang/test/Driver/openmp-offload-infer.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,11 @@

// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \
// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=native \
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp \
// RUN: --offload-arch=sm_70 --offload-arch=gfx908 --offload-arch=skylake \
// RUN: -nogpulib %s 2>&1 | FileCheck %s --check-prefix=CHECK-FAILED

// CHECK-FAILED: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.
// CHECK-FAILED: error: failed to deduce triple for target architecture 'skylake'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.

// RUN: %clang -### --target=x86_64-unknown-linux-gnu -ccc-print-bindings -fopenmp=libomp \
// RUN: --offload-arch=sm_70 --offload-arch=gfx908 -fno-openmp \
Expand Down
56 changes: 56 additions & 0 deletions clang/test/Driver/openmp-system-arch.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// RUN: mkdir -p %t
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_fail %t/
// RUN: cp %S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_fail %t/
// RUN: cp %S/Inputs/nvptx-arch/nvptx_arch_sm_70 %t/
// RUN: echo '#!/bin/sh' > %t/amdgpu_arch_empty
// RUN: chmod +x %t/amdgpu_arch_fail
// RUN: chmod +x %t/amdgpu_arch_gfx906
// RUN: chmod +x %t/amdgpu_arch_empty
// RUN: echo '#!/bin/sh' > %t/nvptx_arch_empty
// RUN: chmod +x %t/nvptx_arch_fail
// RUN: chmod +x %t/nvptx_arch_sm_70
// RUN: chmod +x %t/nvptx_arch_empty

// case when nvptx-arch and amdgpu-arch return nothing or fails
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch= \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_empty --amdgpu-arch-tool=%t/amdgpu_arch_empty %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=NO-OUTPUT-ERROR
// NO-OUTPUT-ERROR: error: failed to deduce triple for target architecture 'native'; specify the triple using '-fopenmp-targets' and '-Xopenmp-target' instead.

// case when amdgpu-arch succeeds.
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_fail --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-GFX906
// ARCH-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"

// case when nvptx-arch succeeds.
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_fail %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-SM_70
// ARCH-SM_70: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

// case when both nvptx-arch and amdgpu-arch succeed.
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-SM_70-GFX906
// ARCH-SM_70-GFX906: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
// ARCH-SM_70-GFX906: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"

// case when both nvptx-arch and amdgpu-arch succeed with other archs.
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -nogpulib -fopenmp --offload-arch=native,sm_75,gfx1030 \
// RUN: --nvptx-arch-tool=%t/nvptx_arch_sm_70 --amdgpu-arch-tool=%t/amdgpu_arch_gfx906 %s 2>&1 \
// RUN: | FileCheck %s --check-prefix=ARCH-MULTIPLE
// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx1030"
// ARCH-MULTIPLE: "-cc1" "-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "gfx906"
// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_70"
// ARCH-MULTIPLE: "-cc1" "-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "sm_75"