-
Notifications
You must be signed in to change notification settings - Fork 15k
[Offload] Move HIP and CUDA to new driver by default #84420
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-binary-utilities @llvm/pr-subscribers-clang Author: Joseph Huber (jhuber6) Changes
Patch is 115.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84420.diff 50 Files Affected:
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 916cb4b7ef34a7..3e77a74c7c0092 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -123,7 +123,7 @@ enum class CudaArch {
LAST,
CudaDefault = CudaArch::SM_52,
- HIPDefault = CudaArch::GFX803,
+ HIPDefault = CudaArch::GFX906,
};
static inline bool IsNVIDIAGpuArch(CudaArch A) {
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 96e6ad77f5e50d..e85a3e675408e4 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -3373,7 +3373,7 @@ class OffloadingActionBuilder final {
const Driver::InputList &Inputs)
: CudaActionBuilderBase(C, Args, Inputs, Action::OFK_HIP) {
- DefaultCudaArch = CudaArch::GFX906;
+ DefaultCudaArch = CudaArch::HIPDefault;
if (Args.hasArg(options::OPT_fhip_emit_relocatable,
options::OPT_fno_hip_emit_relocatable)) {
@@ -4115,9 +4115,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args,
handleArguments(C, Args, Inputs, Actions);
bool UseNewOffloadingDriver =
- C.isOffloadingHostKind(Action::OFK_OpenMP) ||
+ C.getActiveOffloadKinds() != Action::OFK_None &&
Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false);
+ options::OPT_no_offload_new_driver, true);
// Builder to be used to build offloading actions.
std::unique_ptr<OffloadingActionBuilder> OffloadBuilder =
@@ -4802,7 +4802,7 @@ Action *Driver::ConstructPhaseAction(
offloadDeviceOnly() ||
(TargetDeviceOffloadKind == Action::OFK_HIP &&
!Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false)))
+ options::OPT_no_offload_new_driver, true)))
? types::TY_LLVM_IR
: types::TY_LLVM_BC;
return C.MakeAction<BackendJobAction>(Input, Output);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index fa17f6295d6ea7..9605fb28e5fe34 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -4687,8 +4687,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
bool IsHostOffloadingAction =
JA.isHostOffloading(Action::OFK_OpenMP) ||
(JA.isHostOffloading(C.getActiveOffloadKinds()) &&
+ C.getActiveOffloadKinds() != Action::OFK_None &&
Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false));
+ options::OPT_no_offload_new_driver, true));
bool IsRDCMode =
Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false);
@@ -4997,7 +4998,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
if (IsUsingLTO) {
if (IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) &&
!Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false) &&
+ options::OPT_no_offload_new_driver, true) &&
!Triple.isAMDGPU()) {
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Args.getLastArg(options::OPT_foffload_lto,
@@ -6521,8 +6522,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
}
// Forward the new driver to change offloading code generation.
- if (Args.hasFlag(options::OPT_offload_new_driver,
- options::OPT_no_offload_new_driver, false))
+ if (C.getActiveOffloadKinds() != Action::OFK_None &&
+ Args.hasFlag(options::OPT_offload_new_driver,
+ options::OPT_no_offload_new_driver, true))
CmdArgs.push_back("--offload-new-driver");
SanitizeArgs.addArgs(TC, Args, CmdArgs, InputType);
diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index b05bf3b97b7eb7..8f1200f1733597 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -18,11 +18,10 @@
// CUDA-SAME: "-Weverything"
// CUDA: link
-// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
-// HIP-SAME: "-Weverything"
// HIP: "-cc1" "-triple" "amdgcn-amd-amdhsa" "-aux-triple" "x86_64-pc-windows-msvc"
// HIP-SAME: "-Weverything"
-// HIP: {{lld.* "-flavor" "gnu" "-m" "elf64_amdgpu"}}
+// HIP: "-cc1" "-triple" "x86_64-pc-windows-msvc{{.*}}" "-aux-triple" "amdgcn-amd-amdhsa"
+// HIP-SAME: "-Weverything"
// HIP: {{link.* "amdhip64.lib"}}
// CMake uses this option when finding packages for HIP, so
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index f37964d87c66a1..757defec959c47 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -69,19 +69,19 @@
// HIP: clang-offload-bundler
-// SM20:--image=profile=sm_20{{.*}}--image=profile=compute_20
-// SM21:--image=profile=sm_21{{.*}}--image=profile=compute_20
-// SM30:--image=profile=sm_30{{.*}}--image=profile=compute_30
-// SM32:--image=profile=sm_32{{.*}}--image=profile=compute_32
-// SM35:--image=profile=sm_35{{.*}}--image=profile=compute_35
-// SM37:--image=profile=sm_37{{.*}}--image=profile=compute_37
-// SM50:--image=profile=sm_50{{.*}}--image=profile=compute_50
-// SM52:--image=profile=sm_52{{.*}}--image=profile=compute_52
-// SM53:--image=profile=sm_53{{.*}}--image=profile=compute_53
-// SM60:--image=profile=sm_60{{.*}}--image=profile=compute_60
-// SM61:--image=profile=sm_61{{.*}}--image=profile=compute_61
-// SM62:--image=profile=sm_62{{.*}}--image=profile=compute_62
-// SM70:--image=profile=sm_70{{.*}}--image=profile=compute_70
+// SM20:--image=profile=sm_20{{.*}}
+// SM21:--image=profile=sm_21{{.*}}
+// SM30:--image=profile=sm_30{{.*}}
+// SM32:--image=profile=sm_32{{.*}}
+// SM35:--image=profile=sm_35{{.*}}
+// SM37:--image=profile=sm_37{{.*}}
+// SM50:--image=profile=sm_50{{.*}}
+// SM52:--image=profile=sm_52{{.*}}
+// SM53:--image=profile=sm_53{{.*}}
+// SM60:--image=profile=sm_60{{.*}}
+// SM61:--image=profile=sm_61{{.*}}
+// SM62:--image=profile=sm_62{{.*}}
+// SM70:--image=profile=sm_70{{.*}}
// GFX600:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx600
// GFX601:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx601
// GFX602:-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu
index 877b20bc9351bc..35a56a8bef0f0d 100644
--- a/clang/test/Driver/cuda-bad-arch.cu
+++ b/clang/test/Driver/cuda-bad-arch.cu
@@ -30,9 +30,9 @@
// RUN: | FileCheck -check-prefix OK %s
// We don't allow using NVPTX/AMDGCN for host compilation.
-// RUN: not %clang -### --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
+// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=nvptx-nvidia-cuda -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: | FileCheck -check-prefix HOST_NVPTX %s
-// RUN: not %clang -### --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
+// RUN: not %clang -### --no-offload-new-driver --cuda-host-only --target=amdgcn-amd-amdhsa -nogpulib -nogpuinc -c %s 2>&1 \
// RUN: | FileCheck -check-prefix HOST_AMDGCN %s
// OK-NOT: error: Unsupported CUDA gpu architecture
diff --git a/clang/test/Driver/cuda-bindings.cu b/clang/test/Driver/cuda-bindings.cu
index e130e08f20152a..3b95aa4bd50541 100644
--- a/clang/test/Driver/cuda-bindings.cu
+++ b/clang/test/Driver/cuda-bindings.cu
@@ -26,14 +26,14 @@
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
// BIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// BIN-NOT: cuda-bindings-device-cuda-nvptx64
-// BIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// BIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
//
// Test single gpu architecture up to the assemble phase.
//
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --cuda-gpu-arch=sm_30 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM %s
-// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
+// ASM-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[BINDINGS:.+.s]]"
// ASM-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
//
@@ -64,8 +64,8 @@
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
// BIN2: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// BIN2-NOT: cuda-bindings-device-cuda-nvptx64
-// AOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
-// TOUT: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "{{.*}}/out"
+// AOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
+// TOUT: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "{{.*}}/out"
// .. same, but with -fsyntax-only
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings -fsyntax-only \
@@ -81,9 +81,9 @@
// RUN: --offload-arch=sm_30,sm_35 %s -o %t/out 2>&1 \
// RUN: | FileCheck -check-prefix=SYN %s
// SYN-NOT: inputs:
-// SYN: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
-// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
// SYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// SYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
// SYN-NOT: inputs
// .. and with --offload-new-driver
@@ -100,7 +100,7 @@
// RUN: --offload-arch=sm_30,sm_35 %s --offload-new-driver -o %t/out 2>&1 \
// RUN: | FileCheck -check-prefix=NDSYN %s
// NDSYN-NOT: inputs:
-// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
+// NDSYN: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
// NDSYN-NEXT: # "nvptx64-nvidia-cuda" - "clang", inputs: [{{.*}}], output: (nothing)
// NDSYN-NEXT: # "powerpc64le-ibm-linux-gnu" - "clang", inputs: [{{.*}}], output: (nothing)
// NDSYN-NOT: inputs:
@@ -112,8 +112,8 @@
// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s -S 2>&1 \
// RUN: | FileCheck -check-prefix=ASM2 %s
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
-// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_35.s"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM30:.+.s]]"
+// ASM2-DAG: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "[[SM35:.+.s]]"
// ASM2-DAG: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output: "cuda-bindings.s"
//
@@ -128,7 +128,7 @@
// RUN: | FileCheck -check-prefix=HBIN %s
// HBIN: # "powerpc64le-ibm-linux-gnu" - "clang",{{.*}} output:
// HBIN-NOT: cuda-bindings-device-cuda-nvptx64
-// HBIN: # "powerpc64le-ibm-linux-gnu" - "GNU::Linker", inputs:{{.*}}, output: "a.out"
+// HBIN: # "powerpc64le-ibm-linux-gnu" - "Offload::Linker", inputs:{{.*}}, output: "a.out"
//
// Test one or more gpu architecture up to the assemble phase in host-only
@@ -166,7 +166,7 @@
// Test two gpu architectures with complete compilation in device-only
// compilation mode.
//
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only 2>&1 \
// RUN: | FileCheck -check-prefix=DBIN2 %s
// DBIN2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output:
@@ -180,7 +180,7 @@
// Test two gpu architectures up to the assemble phase in device-only
// compilation mode.
//
-// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings \
+// RUN: %clang -target powerpc64le-ibm-linux-gnu -ccc-print-bindings --no-offload-new-driver \
// RUN: --cuda-gpu-arch=sm_30 --cuda-gpu-arch=sm_35 %s --cuda-device-only -S 2>&1 \
// RUN: | FileCheck -check-prefix=DASM2 %s
// DASM2: # "nvptx64-nvidia-cuda" - "clang",{{.*}} output: "cuda-bindings-cuda-nvptx64-nvidia-cuda-sm_30.s"
diff --git a/clang/test/Driver/cuda-external-tools.cu b/clang/test/Driver/cuda-external-tools.cu
index 1aa87cc09982c6..946e144fce38fb 100644
--- a/clang/test/Driver/cuda-external-tools.cu
+++ b/clang/test/Driver/cuda-external-tools.cu
@@ -25,7 +25,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT3 %s
// Generating relocatable device code
// RUN: %clang -### --target=x86_64-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// With debugging enabled, ptxas should be run with with no ptxas optimizations.
@@ -59,7 +59,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35 %s
// Separate compilation targeting sm_35.
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// 32-bit compile.
@@ -68,7 +68,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35 %s
// 32-bit compile when generating relocatable device code.
// RUN: %clang -### --target=i386-linux-gnu -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
// Compile with -fintegrated-as. This should still cause us to invoke ptxas.
@@ -77,7 +77,7 @@
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,OPT0 %s
// Check that we still pass -c when generating relocatable device code.
// RUN: %clang -### --target=x86_64-linux-gnu -fintegrated-as -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// Check -Xcuda-ptxas and -Xcuda-fatbinary
@@ -99,13 +99,13 @@
// Check relocatable device code generation on MacOS.
// RUN: %clang -### --target=x86_64-apple-macosx -O0 -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### --target=x86_64-apple-macosx --cuda-gpu-arch=sm_35 -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH64,SM35,RDC %s
// RUN: %clang -### --target=i386-apple-macosx -fgpu-rdc -c %s 2>&1 \
-// RUN: --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: --no-offload-new-driver --offload-arch=sm_35 --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
// RUN: | FileCheck -check-prefixes=CHECK,ARCH32,SM35,RDC %s
// Check that CLANG forwards the -v flag to PTXAS.
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index 8999a6618fe1fa..3aef694b56f496 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -4,13 +4,13 @@
// Simple compilation case. Compile device-side to PTX assembly and make sure
// we use it on the host side.
-// RUN: %clang -### -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -c -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix NOLINK %s
// Typical compilation + link case.
-// RUN: %clang -### -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -target x86_64-linux-gnu -nogpulib -nogpuinc %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix LINK %s
@@ -35,7 +35,7 @@
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-compile-host-device \
-// RUN: --cuda-host-only -nogpulib -nogpuinc %s 2>&1 \
+// RUN: --cuda-host-only --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck -check-prefix NODEVICE -check-prefix HOST \
// RUN: -check-prefix NOINCLUDES-DEVICE -check-prefix LINK %s
@@ -50,27 +50,27 @@
// RUN: -check-prefix NOHOST -check-prefix NOLINK %s
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-host-only \
-// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix LINK %s
// RUN: %clang -### --target=x86_64-linux-gnu --cuda-device-only \
-// RUN: -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
+// RUN: --cuda-include-ptx=all -nogpulib -nogpuinc --cuda-compile-host-device %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix HOST -check-prefix INCLUDES-DEVICE \
// RUN: -check-prefix LINK %s
// Verify that --cuda-gpu-arch option passes the correct GPU architecture to
// device compilation.
-// RUN: %clang -### -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
+// RUN: %clang -### --cuda-include-ptx=all -nogpulib -nogpuinc --target=x86_64-linux-gnu --cuda-gpu-arch=sm_52 -c %s 2>&1 \
// RUN: | FileCheck -check-prefix DEVICE -check-prefix DEVICE-NOSAVE \
// RUN: -check-prefix DEVICE-SM52 -check-prefix HOST \
// RUN: -check-prefix INCLUDES-DEVICE -check-prefix NOLINK %s
// Verify that there is one device-side compilation per --cuda-gpu-arch args
// and that all results are included on the host side.
-// RUN: %clang -### --target=x86_64-linux-gnu \
+// RUN: %clang -### --target=x86_64-linux-gnu --cuda-include-ptx=all \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 -c %s 2>&1 \
// RUN: | FileCheck -check-prefixes DEVICE,DEVICE-NOSAVE,DEVICE2 \
// RUN: -check-prefixes DEVICE-SM52,DEVICE2-SM60 \
@@ -130,9 +130,9 @@
// f) --no-cuda-gpu-arch=all negates all preceding --cuda-gpu-arch=X
// RUN: %clang -### -target x86_64-linux-gnu --cuda-device-only \
// RUN: -nogpulib -nogpuinc --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
-// RUN: --no-cuda-gpu-arch=all \
+// RUN: --no-cuda-version-check --no-cuda-gpu-arch=all \
// RUN: --cuda-gpu-arch=sm_70 \
-// RUN: -c -nogpulib -nogpuinc %s 2>&1 \
+// RUN: -c --cuda-path=%S/Inputs/CUDA/usr/local/cuda %s 2>&1 \
// RUN: | FileCheck -check-prefixes NOARCH-SM52,NOARCH-SM60,ARCH-SM70 %s
// g) There's no --cuda-gpu-arch=all
@@ -143,7 +143,7 @@
// Verify that --[no-]cuda-include-ptx arguments are handled correctly.
-// a) by default we're including PTX for all GPUs.
+// a) by default we're not including PTX for all GPUs.
// RUN: %clang -### --target=x86_64-linux-gnu -nogpulib -nogpuinc \
// RUN: --cuda-gpu-arch=sm_60 --cuda-gpu-arch=sm_52 \
// RUN: -c %s 2>&1 \
@@ -185,7 +185,8 @@
// Verify -flto=thin -fwhole-program-vtables handling. This should result in
// both options being passed to the host compilation, with neither passed to
// the device compilation.
-// RUN: %clang -...
[truncated]
|
There are many |
I think it's much easier to keep these in a single patch, both for writing and with any potential reverting that it may require. |
I could possibly precommit the basic checks for the ones that just check the phases / bindings. |
Since llvm is moving rapidly, reverting a commit that modifies many tests is quite risky, since these tests could have been modified by other changes or just cause merge conflicts for others. I assume that the merit of this patch is obvious (a new feature is in development for quite some time, then it matures and replaces the old feature), so adding If you think showing the difference is important, you can add to the description the test commits. |
I'll try to pull out the files that are purely keeping the old behavior, since it'll be a no-op without this patch. |
6cefad7
to
3e2846b
Compare
Done |
3e2846b
to
677e374
Compare
Hi @jhuber6! Thank you for the pull request!
Do you mean the SPIR-V target (backend)? I have not followed this area of work closely. What is missing or what exactly needs to be supported by the SPIR-V target? Any help or pointers would be greatly appreciated! |
I believe there was some work to port SYCL to work with the new driver, however I don't know that status of that. However what I need from W.r.t. the |
Hi @jhuber6 For porting SYCL to work with the new driver, We have started making changes in the intel/llvm repository. But they have not been upstreamed yet. We can make sure to align with changes being made here when that happens. I will let @michalpaszkowski answer if the SPIR-V backend that exists today can do what you expected. i.e clang --target=spriv-something-something foo.c to spit out some valid SPIR-V. Thanks |
3b5c311
to
778fff6
Compare
778fff6
to
1d8acac
Compare
@yxsamliu Do you know what the next steps for merging this would be? I'd like to get it into the Clang 20 release if possible. The only thing this loses currently is managed variables being registered in RDC mode, but I'm going to assume that's hardly seen in practice so I could probably punt that until later. I unfortunately haven't figured out a way to reproduce the build failures on rocBLAS that the fork saw. I think @saiislam was looking into that but couldn't get docker to work. |
I think at least we need to get this PR pass internal PSDB. We could discuss the docker issues internally. |
1d8acac
to
ac95806
Compare
ac95806
to
8d63e56
Compare
8d63e56
to
2384f1b
Compare
Summary: This patch updates the `--offload-new-driver` flag to be default for all current offloading languages. This mostly just required updating a lot of tests to use the old format. I tried to update them where possible, but some were directly checking the old format. This is not intended to be landed immediately, but to allow for greater testing. One potential issue I've discovered is the lack of SPIR-V support or handling for `--offload`.
2384f1b
to
90a6145
Compare
This patch updates the
--offload-new-driver
flag to be default for allcurrent offloading languages. This mostly just required updating a lot
of tests to use the old format. I tried to update them where possible,
but some were directly checking the old format.
This is not intended to be landed immediately, but to allow for greater
testing. One potential issue I've discovered is the lack of SPIR-V
support or handling for
--offload
.