From f59c80a4fd9b28166810ffbb8dbd2def10054512 Mon Sep 17 00:00:00 2001 From: Leo Fang Date: Thu, 22 Feb 2024 18:40:21 -0800 Subject: [PATCH] fix CI error --- cupy/cuda/cupy_cub.cu | 69 +++++++++++++++++++++++++++---- install/cupy_builder/_compiler.py | 8 ++-- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/cupy/cuda/cupy_cub.cu b/cupy/cuda/cupy_cub.cu index 5495ad36312..802d62bd59d 100644 --- a/cupy/cuda/cupy_cub.cu +++ b/cupy/cuda/cupy_cub.cu @@ -28,6 +28,7 @@ // numbers as in general the comparison is ill defined. // - DO NOT USE THIS STUB for supporting CUB sorting!!!!!! using namespace cub; +#define CUPY_CUB_NAMESPACE cub template <> struct FpLimits> @@ -99,6 +100,7 @@ class numeric_limits<__half> { // hipCUB internally uses std::numeric_limits, so we should provide specializations for the complex numbers. // Note that there's std::complex, so to avoid name collision we must use the full decoration (thrust::complex)! // TODO(leofang): wrap CuPy's thrust namespace with another one (say, cupy::thrust) for safer scope resolution? +#define CUPY_CUB_NAMESPACE hipcub namespace std { template <> @@ -111,6 +113,12 @@ class numeric_limits> { static __host__ __device__ thrust::complex lowest() noexcept { return thrust::complex(-std::numeric_limits::max(), -std::numeric_limits::max()); } + + static __host__ __device__ thrust::complex infinity() noexcept { + return thrust::complex(std::numeric_limits::infinity(), std::numeric_limits::infinity()); + } + + static constexpr bool has_infinity = true; }; template <> @@ -123,6 +131,12 @@ class numeric_limits> { static __host__ __device__ thrust::complex lowest() noexcept { return thrust::complex(-std::numeric_limits::max(), -std::numeric_limits::max()); } + + static __host__ __device__ thrust::complex infinity() noexcept { + return thrust::complex(std::numeric_limits::infinity(), std::numeric_limits::infinity()); + } + + static constexpr bool has_infinity = true; }; // Copied from https://github.com/ROCmSoftwarePlatform/hipCUB/blob/master-rocm-3.5/hipcub/include/hipcub/backend/rocprim/device/device_reduce.hpp @@ -142,12 +156,27 @@ class numeric_limits<__half> { __half lowest_value = *reinterpret_cast<__half*>(&lowest_half); return lowest_value; } + + static __host__ __device__ __half infinity() noexcept { + unsigned short inf_half = 0x7C00U; + __half inf_value = *reinterpret_cast<__half*>(&inf_half); + return inf_value; + } + + static constexpr bool has_infinity = true; }; } // namespace std using namespace hipcub; #endif // ifndef CUPY_USE_HIP + +__host__ __device__ __half half_negate_inf() { + unsigned short minf_half = 0xFC00U; + __half* minf_value = reinterpret_cast<__half*>(&minf_half); + return *minf_value; +} + /* ------------------------------------ end of boilerplate ------------------------------------ */ @@ -711,7 +740,7 @@ struct _cub_reduce_min { { DeviceReduce::Reduce(workspace, workspace_size, static_cast(x), static_cast(y), num_items, - cub::Min(), std::numeric_limits::infinity(), s); + CUPY_CUB_NAMESPACE::Min(), std::numeric_limits::infinity(), s); } else { @@ -731,7 +760,7 @@ struct _cub_segmented_reduce_min { DeviceSegmentedReduce::Reduce(workspace, workspace_size, static_cast(x), static_cast(y), num_segments, offset_start, offset_start+1, - cub::Min(), std::numeric_limits::infinity(), s); + CUPY_CUB_NAMESPACE::Min(), std::numeric_limits::infinity(), s); } else { @@ -752,9 +781,20 @@ struct _cub_reduce_max { { if constexpr (std::numeric_limits::has_infinity) { - DeviceReduce::Reduce(workspace, workspace_size, static_cast(x), - static_cast(y), num_items, - cub::Max(), -std::numeric_limits::infinity(), s); + // to avoid compiler error: invalid argument type '__half' to unary expression on HIP... + if constexpr (std::is_same_v) + { + DeviceReduce::Reduce(workspace, workspace_size, static_cast(x), + static_cast(y), num_items, + CUPY_CUB_NAMESPACE::Max(), half_negate_inf(), s); + } + else + { + DeviceReduce::Reduce(workspace, workspace_size, static_cast(x), + static_cast(y), num_items, + CUPY_CUB_NAMESPACE::Max(), -std::numeric_limits::infinity(), s); + + } } else { @@ -771,10 +811,21 @@ struct _cub_segmented_reduce_max { { if constexpr (std::numeric_limits::has_infinity) { - DeviceSegmentedReduce::Reduce(workspace, workspace_size, - static_cast(x), static_cast(y), num_segments, - offset_start, offset_start+1, - cub::Max(), -std::numeric_limits::infinity(), s); + // to avoid compiler error: invalid argument type '__half' to unary expression on HIP... + if constexpr (std::is_same_v) + { + DeviceSegmentedReduce::Reduce(workspace, workspace_size, + static_cast(x), static_cast(y), num_segments, + offset_start, offset_start+1, + CUPY_CUB_NAMESPACE::Max(), half_negate_inf(), s); + } + else + { + DeviceSegmentedReduce::Reduce(workspace, workspace_size, + static_cast(x), static_cast(y), num_segments, + offset_start, offset_start+1, + CUPY_CUB_NAMESPACE::Max(), -std::numeric_limits::infinity(), s); + } } else { diff --git a/install/cupy_builder/_compiler.py b/install/cupy_builder/_compiler.py index bbb33d96e7d..dcf436b65e6 100644 --- a/install/cupy_builder/_compiler.py +++ b/install/cupy_builder/_compiler.py @@ -218,12 +218,10 @@ def _compile_unix_hipcc(self, obj: str, src: str, ext: Extension) -> None: base_opts = build.get_compiler_base_options(rocm_path) compiler_so = rocm_path - hip_version = build.get_hip_version() postargs = ['-O2', '-fPIC', '--include', 'hip_runtime.h'] - if hip_version >= 402: - postargs += ['--std=c++14'] - else: - postargs += ['--std=c++11'] + # Note: we only support ROCm 4.3+ since CuPy v11.0.0. + # Bumping C++ standard from C++14 to C++17 for "if constexpr" + postargs += ['--std=c++17'] print('HIPCC options:', postargs) self.spawn(compiler_so + base_opts + cc_args + [src, '-o', obj] + postargs)