[CUDA][SPIRV] Match builtin types and __GCC_ATOMIC_XXX_LOCK_FREE macr…

…os on host/device This change matches the CUDA/SPIRV behavior with CUDA/NVPTX, and makes some builtin types and __GCC_ATOMIC_XXX_LOCK_FREE macros the same between the host and device. This is only done when host triple is provided and known, otherwise the behavior is unchanged. Reviewed By: yaxunl Differential Revision: https://reviews.llvm.org/D144047
llvm · Feb 22, 2023 · 8bd13ad · 8bd13ad
1 parent 9248b5d
commit 8bd13ad
Show file tree

Hide file tree

Showing 2 changed files with 106 additions and 1 deletion.
diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CLANG_LIB_BASIC_TARGETS_SPIR_H
 #define LLVM_CLANG_LIB_BASIC_TARGETS_SPIR_H
 
+#include "Targets.h"
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/Support/Compiler.h"
@@ -79,8 +80,10 @@ static const unsigned SPIRDefIsGenMap[] = {
 
 // Base class for SPIR and SPIR-V target info.
 class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
+  std::unique_ptr<TargetInfo> HostTarget;
+
 protected:
-  BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &)
+  BaseSPIRTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts)
       : TargetInfo(Triple) {
     assert((Triple.isSPIR() || Triple.isSPIRV()) &&
            "Invalid architecture for SPIR or SPIR-V.");
@@ -98,6 +101,52 @@ class LLVM_LIBRARY_VISIBILITY BaseSPIRTargetInfo : public TargetInfo {
     // Define available target features
     // These must be defined in sorted order!
     NoAsmVariants = true;
+
+    llvm::Triple HostTriple(Opts.HostTriple);
+    if (!HostTriple.isSPIR() && !HostTriple.isSPIRV() &&
+        HostTriple.getArch() != llvm::Triple::UnknownArch) {
+      HostTarget.reset(AllocateTarget(llvm::Triple(Opts.HostTriple), Opts));
+
+      // Copy properties from host target.
+      BoolWidth = HostTarget->getBoolWidth();
+      BoolAlign = HostTarget->getBoolAlign();
+      IntWidth = HostTarget->getIntWidth();
+      IntAlign = HostTarget->getIntAlign();
+      HalfWidth = HostTarget->getHalfWidth();
+      HalfAlign = HostTarget->getHalfAlign();
+      FloatWidth = HostTarget->getFloatWidth();
+      FloatAlign = HostTarget->getFloatAlign();
+      DoubleWidth = HostTarget->getDoubleWidth();
+      DoubleAlign = HostTarget->getDoubleAlign();
+      LongWidth = HostTarget->getLongWidth();
+      LongAlign = HostTarget->getLongAlign();
+      LongLongWidth = HostTarget->getLongLongWidth();
+      LongLongAlign = HostTarget->getLongLongAlign();
+      MinGlobalAlign = HostTarget->getMinGlobalAlign(/* TypeSize = */ 0);
+      NewAlign = HostTarget->getNewAlign();
+      DefaultAlignForAttributeAligned =
+          HostTarget->getDefaultAlignForAttributeAligned();
+      IntMaxType = HostTarget->getIntMaxType();
+      WCharType = HostTarget->getWCharType();
+      WIntType = HostTarget->getWIntType();
+      Char16Type = HostTarget->getChar16Type();
+      Char32Type = HostTarget->getChar32Type();
+      Int64Type = HostTarget->getInt64Type();
+      SigAtomicType = HostTarget->getSigAtomicType();
+      ProcessIDType = HostTarget->getProcessIDType();
+
+      UseBitFieldTypeAlignment = HostTarget->useBitFieldTypeAlignment();
+      UseZeroLengthBitfieldAlignment =
+          HostTarget->useZeroLengthBitfieldAlignment();
+      UseExplicitBitFieldAlignment = HostTarget->useExplicitBitFieldAlignment();
+      ZeroLengthBitfieldBoundary = HostTarget->getZeroLengthBitfieldBoundary();
+
+      // This is a bit of a lie, but it controls __GCC_ATOMIC_XXX_LOCK_FREE, and
+      // we need those macros to be identical on host and device, because (among
+      // other things) they affect which standard library classes are defined,
+      // and we need all classes to be defined on both the host and device.
+      MaxAtomicInlineWidth = HostTarget->getMaxAtomicInlineWidth();
+    }
   }
 
 public:

diff --git a/clang/test/CodeGenCUDASPIRV/cuda-types.cu b/clang/test/CodeGenCUDASPIRV/cuda-types.cu
@@ -0,0 +1,56 @@
+// Check that types, widths, __CLANG_ATOMIC* macros, etc. match on the host and
+// device sides of CUDA compilations. Note that we filter out long double and
+// maxwidth of _BitInt(), as this is intentionally different on host and device.
+//
+// Also ignore __CLANG_ATOMIC_LLONG_LOCK_FREE on i386. The default host CPU for
+// an i386 triple is typically at least an i586, which has cmpxchg8b (Clang
+// feature, "cx8"). Therefore, __CLANG_ATOMIC_LLONG_LOCK_FREE is 2 on the host,
+// but the value should be 1 for the device.
+//
+// Unlike CUDA, the width of SPIR-V POINTER type could differ between host and
+// device, because SPIR-V explicitly sets POINTER type width. So it is the
+// user's responsibility to choose the offload with the right POINTER size,
+// otherwise the values for __CLANG_ATOMIC_POINTER_LOCK_FREE could be different.
+
+// RUN: mkdir -p %t
+
+// RUN: %clang --cuda-host-only -nocudainc -nocudalib --offload=spirv32 -target i386-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/i386-host-defines-filtered
+// RUN: %clang --cuda-device-only -nocudainc -nocudalib --offload=spirv32 -target i386-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/i386-device-defines-filtered
+// RUN: diff %t/i386-host-defines-filtered %t/i386-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -nocudalib --offload=spirv32 -target i386-windows-msvc -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/i386-msvc-host-defines-filtered
+// RUN: %clang --cuda-device-only -nocudainc -nocudalib --offload=spirv32 -target i386-windows-msvc -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/i386-msvc-device-defines-filtered
+// RUN: diff %t/i386-msvc-host-defines-filtered %t/i386-msvc-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -nocudalib --offload=spirv64 -target x86_64-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/x86_64-host-defines-filtered
+// RUN: %clang --cuda-device-only -nocudainc -nocudalib --offload=spirv64 -target x86_64-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/x86_64-device-defines-filtered
+// RUN: diff %t/x86_64-host-defines-filtered %t/x86_64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -nocudalib --offload=spirv64 -target powerpc64-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/powerpc64-host-defines-filtered
+// RUN: %clang --cuda-device-only -nocudainc -nocudalib --offload=spirv64 -target powerpc64-unknown-linux-gnu -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/powerpc64-device-defines-filtered
+// RUN: diff %t/powerpc64-host-defines-filtered %t/powerpc64-device-defines-filtered
+
+// RUN: %clang --cuda-host-only -nocudainc -nocudalib --offload=spirv64 -target x86_64-windows-msvc -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/x86_64-msvc-host-defines-filtered
+// RUN: %clang --cuda-device-only -nocudainc -nocudalib --offload=spirv64 -target x86_64-windows-msvc -x cuda -emit-llvm -E -dM -o - /dev/null \
+// RUN:   | grep -E '__CLANG_ATOMIC' \
+// RUN:   | grep -Ev '_ATOMIC_LLONG_LOCK_FREE' > %t/x86_64-msvc-device-defines-filtered
+// RUN: diff %t/x86_64-msvc-host-defines-filtered %t/x86_64-msvc-device-defines-filtered
+