Skip to content

Commit

Permalink
Revert "Merge pull request kokkos#5964 from PhilMiller/cuda-lambda-de…
Browse files Browse the repository at this point in the history
…fault"

This reverts commit 945281a, reversing
changes made to a45cc1e.
  • Loading branch information
dalg24 committed May 6, 2023
1 parent 5fa72b5 commit d7c06c4
Show file tree
Hide file tree
Showing 21 changed files with 82 additions and 31 deletions.
4 changes: 4 additions & 0 deletions .jenkins
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ pipeline {
-DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_OPENMP=ON \
.. && \
make -j8 && ctest --verbose'''
Expand Down Expand Up @@ -312,6 +313,7 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_TUNING=ON \
-DKokkos_ARCH_VOLTA70=ON \
.. && \
Expand Down Expand Up @@ -384,6 +386,7 @@ pipeline {
-DKokkos_ENABLE_COMPILER_WARNINGS=ON \
-DKokkos_ENABLE_OPENMP=OFF \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=OFF \
-DKokkos_ENABLE_CUDA_UVM=ON \
-DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=ON \
-DKokkos_ENABLE_DEPRECATED_CODE_3=ON \
Expand Down Expand Up @@ -450,6 +453,7 @@ pipeline {
-DKokkos_ENABLE_TESTS=ON \
-DKokkos_ENABLE_BENCHMARKS=ON \
-DKokkos_ENABLE_CUDA=ON \
-DKokkos_ENABLE_CUDA_LAMBDA=ON \
-DKokkos_ENABLE_LIBDL=OFF \
.. && \
make -j8 && ctest --verbose && \
Expand Down
14 changes: 8 additions & 6 deletions Makefile.kokkos
Original file line number Diff line number Diff line change
Expand Up @@ -665,13 +665,15 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif

ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
KOKKOS_CXXFLAGS += -expt-extended-lambda
endif

ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(call kokkos_append_header,"$H""define KOKKOS_ENABLE_CUDA_LAMBDA")
endif
endif

ifeq ($(KOKKOS_INTERNAL_CUDA_USE_CONSTEXPR), 1)
Expand Down
1 change: 1 addition & 0 deletions cmake/KokkosCore_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@

#cmakedefine KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#cmakedefine KOKKOS_ENABLE_CUDA_UVM
#cmakedefine KOKKOS_ENABLE_CUDA_LAMBDA
#cmakedefine KOKKOS_ENABLE_CUDA_CONSTEXPR
#cmakedefine KOKKOS_ENABLE_IMPL_CUDA_MALLOC_ASYNC
#cmakedefine KOKKOS_ENABLE_HIP_RELOCATABLE_DEVICE_CODE
Expand Down
17 changes: 3 additions & 14 deletions cmake/kokkos_arch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -162,21 +162,10 @@ ENDIF()
#clear anything that might be in the cache
GLOBAL_SET(KOKKOS_CUDA_OPTIONS)
# Construct the Makefile options
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
# Extended lambda support was stabilized in nvcc 12
IF(KOKKOS_COMPILER_VERSION_MAJOR EQUAL 11)
IF (KOKKOS_ENABLE_CUDA_LAMBDA)
IF(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA)
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-expt-extended-lambda")
ELSE()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-extended-lambda")
ENDIF()
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
ENDIF()

IF(DEFINED Kokkos_ENABLE_CUDA_LAMBDA)
IF(Kokkos_ENABLE_CUDA_LAMBDA)
MESSAGE(DEPRECATION "CUDA extended lambda support is now always enabled. The option Kokkos_ENABLE_CUDA_LAMBDA will be removed")
ELSE()
MESSAGE(FATAL_ERROR "Support for disabling CUDA extended lambdas has been removed. Please unset Kokkos_ENABLE_CUDA_LAMBDA, or see #5964 if this is necessary for your application")
GLOBAL_APPEND(KOKKOS_CUDA_OPTIONS "-Wext-lambda-captures-this")
ENDIF()
ENDIF()

Expand Down
10 changes: 9 additions & 1 deletion cmake/kokkos_enable_options.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ mark_as_advanced(Kokkos_ENABLE_IMPL_MDSPAN)
mark_as_advanced(Kokkos_ENABLE_MDSPAN_EXTERNAL)
mark_as_advanced(Kokkos_ENABLE_IMPL_SKIP_COMPILER_MDSPAN)

IF (Trilinos_ENABLE_Kokkos AND TPL_ENABLE_CUDA)
SET(CUDA_LAMBDA_DEFAULT ON)
ELSEIF (KOKKOS_ENABLE_CUDA)
SET(CUDA_LAMBDA_DEFAULT ON)
ELSE()
SET(CUDA_LAMBDA_DEFAULT OFF)
ENDIF()
KOKKOS_ENABLE_OPTION(CUDA_LAMBDA ${CUDA_LAMBDA_DEFAULT} "Whether to activate experimental lambda features")
IF (Trilinos_ENABLE_Kokkos)
SET(COMPLEX_ALIGN_DEFAULT OFF)
ELSE()
Expand Down Expand Up @@ -115,7 +123,7 @@ FUNCTION(check_device_specific_options)
ENDIF()
ENDFUNCTION()

CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE CUDA OPTIONS CUDA_UVM CUDA_RELOCATABLE_DEVICE_CODE CUDA_LAMBDA CUDA_CONSTEXPR CUDA_LDG_INTRINSIC)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HIP OPTIONS HIP_RELOCATABLE_DEVICE_CODE)
CHECK_DEVICE_SPECIFIC_OPTIONS(DEVICE HPX OPTIONS IMPL_HPX_ASYNC_DISPATCH)

Expand Down
2 changes: 2 additions & 0 deletions containers/unit_tests/TestErrorReporter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ struct ErrorReporterDriver : public ErrorReporterDriverBase<DeviceType> {
}
};

#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)
template <typename DeviceType>
struct ErrorReporterDriverUseLambda
: public ErrorReporterDriverBase<DeviceType> {
Expand Down Expand Up @@ -177,6 +178,7 @@ struct ErrorReporterDriverUseLambda
driver_base::check_expectations(reporter_capacity, test_size);
}
};
#endif

#ifdef KOKKOS_ENABLE_OPENMP
struct ErrorReporterDriverNativeOpenMP
Expand Down
14 changes: 14 additions & 0 deletions containers/unit_tests/TestOffsetView.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ void test_offsetview_construction() {
ASSERT_EQ(ov.extent(0), 5u);
ASSERT_EQ(ov.extent(1), 5u);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
{
Kokkos::Experimental::OffsetView<Scalar*, Device> offsetV1("OneDOffsetView",
range0);
Expand Down Expand Up @@ -148,6 +149,7 @@ void test_offsetview_construction() {
}

ASSERT_EQ(OVResult, answer) << "Bad data found in OffsetView";
#endif

{
offset_view_type ovCopy(ov);
Expand Down Expand Up @@ -182,6 +184,7 @@ void test_offsetview_construction() {
range3_type rangePolicy3DZero(point3_type{{0, 0, 0}},
point3_type{{extent0, extent1, extent2}});

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int view3DSum = 0;
Kokkos::parallel_reduce(
rangePolicy3DZero,
Expand All @@ -204,6 +207,7 @@ void test_offsetview_construction() {

ASSERT_EQ(view3DSum, offsetView3DSum)
<< "construction of OffsetView from View and begins array broken.";
#endif
}
view_type viewFromOV = ov.view();

Expand All @@ -228,6 +232,7 @@ void test_offsetview_construction() {
view_type aView("aView", ov.extent(0), ov.extent(1));
Kokkos::deep_copy(aView, ov);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -237,6 +242,7 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(view, offsetView) broken.";
#endif
}

{ // test view to offsetview deep copy
Expand All @@ -245,6 +251,7 @@ void test_offsetview_construction() {
Kokkos::deep_copy(aView, 99);
Kokkos::deep_copy(ov, aView);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
int sum = 0;
Kokkos::parallel_reduce(
rangePolicy2D,
Expand All @@ -254,6 +261,7 @@ void test_offsetview_construction() {
sum);

ASSERT_EQ(sum, 0) << "deep_copy(offsetView, view) broken.";
#endif
}
}

Expand Down Expand Up @@ -421,6 +429,7 @@ void test_offsetview_subview() {
ASSERT_EQ(offsetSubview.begin(1), 0);
ASSERT_EQ(offsetSubview.end(1), 9);

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
using range_type = Kokkos::MDRangePolicy<Device, Kokkos::Rank<2>,
Kokkos::IndexType<int> >;
using point_type = typename range_type::point_type;
Expand All @@ -446,6 +455,7 @@ void test_offsetview_subview() {
sum);

ASSERT_EQ(sum, 6 * (e0 - b0) * (e1 - b1));
#endif
}

// slice 2
Expand Down Expand Up @@ -542,6 +552,7 @@ void test_offsetview_subview() {
}
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
template <class InputIt, class T, class BinaryOperation>
KOKKOS_INLINE_FUNCTION T std_accumulate(InputIt first, InputIt last, T init,
BinaryOperation op) {
Expand Down Expand Up @@ -644,6 +655,7 @@ void test_offsetview_offsets_rank3() {

ASSERT_EQ(0, errors);
}
#endif

TEST(TEST_CATEGORY, offsetview_construction) {
test_offsetview_construction<int, TEST_EXECSPACE>();
Expand All @@ -657,6 +669,7 @@ TEST(TEST_CATEGORY, offsetview_subview) {
test_offsetview_subview<int, TEST_EXECSPACE>();
}

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
TEST(TEST_CATEGORY, offsetview_offsets_rank1) {
test_offsetview_offsets_rank1<TEST_EXECSPACE>();
}
Expand All @@ -668,6 +681,7 @@ TEST(TEST_CATEGORY, offsetview_offsets_rank2) {
TEST(TEST_CATEGORY, offsetview_offsets_rank3) {
test_offsetview_offsets_rank3<TEST_EXECSPACE>();
}
#endif

} // namespace Test

Expand Down
2 changes: 2 additions & 0 deletions core/perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,12 @@ KOKKOS_ADD_BENCHMARK(
SOURCES ${BENCHMARK_SOURCES}
)

IF(NOT KOKKOS_ENABLE_CUDA OR KOKKOS_ENABLE_CUDA_LAMBDA)
KOKKOS_ADD_BENCHMARK(
Benchmark_Atomic_MinMax
SOURCES test_atomic_minmax_simple.cpp
)
ENDIF()

# FIXME_NVHPC
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
Expand Down
2 changes: 2 additions & 0 deletions core/perf_test/PerfTest_ViewAllocate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ BENCHMARK(ViewAllocate_Rank8<Kokkos::LayoutRight>)
->Arg(N)
->UseManualTime();

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -226,5 +227,6 @@ BENCHMARK(ViewAllocate_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test
2 changes: 2 additions & 0 deletions core/perf_test/PerfTest_ViewCopy_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutLeft, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
Expand All @@ -37,5 +38,6 @@ BENCHMARK(ViewDeepCopy_Raw<Kokkos::LayoutRight, Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(10)
->UseManualTime();
#endif

} // namespace Test
2 changes: 2 additions & 0 deletions core/perf_test/PerfTest_ViewFill_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewFill_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -27,5 +28,6 @@ BENCHMARK(ViewFill_Raw<Kokkos::LayoutRight>)
->ArgName("N")
->Arg(N)
->UseManualTime();
#endif

} // namespace Test
2 changes: 2 additions & 0 deletions core/perf_test/PerfTest_ViewResize_Raw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

namespace Test {

#if defined(KOKKOS_ENABLE_CUDA_LAMBDA) || !defined(KOKKOS_ENABLE_CUDA)
BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutLeft>)
->ArgName("N")
->Arg(N)
Expand All @@ -29,5 +30,6 @@ BENCHMARK(ViewResize_NoInit_Raw<Kokkos::LayoutRight>)
->Arg(N)
->UseManualTime()
->Iterations(R);
#endif

} // namespace Test
2 changes: 0 additions & 2 deletions core/src/Kokkos_Macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -547,8 +547,6 @@ static constexpr bool kokkos_omp_on_host() { return false; }

#if defined(KOKKOS_ENABLE_CUDA) && defined(KOKKOS_ENABLE_DEPRECATED_CODE_4)
#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC
// This was previously defined from the configuration option which was removed
#define KOKKOS_ENABLE_CUDA_LAMBDA
#endif

#define KOKKOS_INVALID_INDEX (~std::size_t(0))
Expand Down
6 changes: 6 additions & 0 deletions core/src/setup/Kokkos_Setup_Cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,15 @@
#error "Cuda device capability >= 3.0 is required."
#endif

#ifdef KOKKOS_ENABLE_CUDA_LAMBDA
#define KOKKOS_LAMBDA [=] __host__ __device__

#define KOKKOS_CLASS_LAMBDA [ =, *this ] __host__ __device__

#else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)

#define KOKKOS_IMPL_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_IMPL_FORCEINLINE __forceinline__
#define KOKKOS_IMPL_INLINE_FUNCTION __device__ __host__ inline
Expand Down
6 changes: 6 additions & 0 deletions core/unit_test/TestCompilerMacros.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,15 @@
#error "Only one host compiler macro can be defined"
#endif

#if defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA shouldn't be defined"
#endif
#else
#if !defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
#error "Macro bug: KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA should be defined"
#endif
#endif

namespace TestCompilerMacros {

Expand Down
2 changes: 2 additions & 0 deletions core/unit_test/TestMDRangeReduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ TEST(TEST_CATEGORY, mdrange_parallel_reduce_primitive_types) {
#if defined(KOKKOS_ENABLE_OPENMPTARGET)
GTEST_SKIP() << "FIXME OPENMPTARGET Tests of MDRange reduce over values "
"smaller than int would fail";
#elif defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
GTEST_SKIP() << "Skipped ENABLE_CUDA_LAMBDA";
#else
for (int bound : {0, 1, 7, 32, 65, 7000}) {
for (int k = 0; k < bound; ++k) {
Expand Down
6 changes: 6 additions & 0 deletions core/unit_test/TestTeamMDRange.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ struct TestTeamMDParallelFor {
}
};

// If KOKKOS_ENABLE_CUDA_LAMBDA is off, extended lambdas used in parallel_for
// and parallel_reduce in these tests will not compile correctly
#if !defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_CUDA_LAMBDA)

template <typename ExecSpace>
struct TestTeamThreadMDRangeParallelFor : public TestTeamMDParallelFor {
using TeamType = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
Expand Down Expand Up @@ -1959,5 +1963,7 @@ TEST(TEST_CATEGORY, TeamVectorMDRangeParallelReduce) {
test_parallel_reduce_for_8D_TeamVectorMDRange<Right>(smallDims);
}

#endif

} // namespace TeamMDRange
} // namespace Test

0 comments on commit d7c06c4

Please sign in to comment.