Skip to content

Commit

Permalink
Add ParallelScanRangePolicy test
Browse files Browse the repository at this point in the history
Consolidate parallel_scan() tests using RangePolicy into one new file.
Remove redundant tests.
  • Loading branch information
tcclevenger committed Mar 7, 2023
1 parent 4bf2c5c commit d7896e6
Show file tree
Hide file tree
Showing 6 changed files with 269 additions and 346 deletions.
3 changes: 1 addition & 2 deletions core/unit_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL)
MinMaxClamp
NumericTraits
Other
ParallelScanRangePolicy
QuadPrecisionMath
RangePolicy
RangePolicyConstructors
Expand All @@ -185,7 +186,6 @@ foreach(Tag Threads;Serial;OpenMP;Cuda;HPX;OpenMPTarget;OpenACC;HIP;SYCL)
Reducers_e
Reductions
Reductions_DeviceView
Scan
SharedAlloc
)
set(file ${dir}/Test${Tag}_${Name}.cpp)
Expand Down Expand Up @@ -441,7 +441,6 @@ IF(KOKKOS_ENABLE_OPENMPTARGET AND KOKKOS_CXX_COMPILER_ID STREQUAL NVHPC)
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_d.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reducers_e.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Reductions_DeviceView.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_Scan.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_b.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c01.cpp
${CMAKE_CURRENT_BINARY_DIR}/openmptarget/TestOpenMPTarget_SubView_c02.cpp
Expand Down
20 changes: 10 additions & 10 deletions core/unit_test/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ ifneq ($(KOKKOS_INTERNAL_USE_RDYNAMIC), 1)
KOKKOS_INTERNAL_USE_RDYNAMIC := $(call kokkos_has_string,$(CXXFLAGS),rdynamic)
endif

ifeq ($(KOKKOS_INTERNAL_USE_RDYNAMIC),1)
ifneq ($(KOKKOS_INTERNAL_HAS_OPTIMIZATIONS),1)
ifeq ($(KOKKOS_INTERNAL_USE_RDYNAMIC),1)
ifneq ($(KOKKOS_INTERNAL_HAS_OPTIMIZATIONS),1)
STACK_TRACE_TERMINATE_FILTER :=_dynamic
else
STACK_TRACE_TERMINATE_FILTER :=
Expand All @@ -62,7 +62,7 @@ else
STACK_TRACE_TERMINATE_FILTER :=
endif

TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView Scan SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize
TESTS = AtomicOperations_int AtomicOperations_unsignedint AtomicOperations_longint AtomicOperations_unsignedlongint AtomicOperations_longlongint AtomicOperations_double AtomicOperations_float AtomicOperations_complexdouble AtomicOperations_complexfloat AtomicViews Atomics BlockSizeDeduction Concepts Complex Crs DeepCopyAlignment FunctorAnalysis Init LocalDeepCopy MDRange_a MDRange_b MDRange_c MDRange_d MDRange_e MDRange_f Other ParallelScanRangePolicy RangePolicy RangePolicyRequire Reductions Reducers_a Reducers_b Reducers_c Reducers_d Reducers_e Reductions_DeviceView SharedAlloc TeamBasic TeamReductionScan TeamScratch TeamTeamSize TeamVectorRange UniqueToken ViewAPI_a ViewAPI_b ViewAPI_c ViewAPI_d ViewAPI_e ViewCopy_a ViewCopy_b ViewLayoutStrideAssignment ViewMapping_a ViewMapping_b ViewMapping_subview ViewOfClass WorkGraph View_64bit ViewResize

tmp := $(foreach device, $(KOKKOS_DEVICELIST), \
tmp2 := $(foreach test, $(TESTS), \
Expand Down Expand Up @@ -127,7 +127,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
OBJ_CUDA += TestCuda_SubView_c07.o TestCuda_SubView_c08.o TestCuda_SubView_c09.o
OBJ_CUDA += TestCuda_SubView_c10.o TestCuda_SubView_c11.o TestCuda_SubView_c12.o
OBJ_CUDA += TestCuda_SubView_c13.o
OBJ_CUDA += TestCuda_Reductions.o TestCuda_Scan.o
OBJ_CUDA += TestCuda_Reductions.o TestCuda_ParallelScanRangePolicy.o
OBJ_CUDA += TestCuda_Reductions_DeviceView.o
OBJ_CUDA += TestCuda_Reducers_a.o TestCuda_Reducers_b.o TestCuda_Reducers_c.o TestCuda_Reducers_d.o TestCuda_Reducers_e.o
OBJ_CUDA += TestCuda_Complex.o
Expand Down Expand Up @@ -171,7 +171,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_THREADS), 1)
OBJ_THREADS += TestThreads_SubView_c04.o TestThreads_SubView_c05.o TestThreads_SubView_c06.o
OBJ_THREADS += TestThreads_SubView_c07.o TestThreads_SubView_c08.o TestThreads_SubView_c09.o
OBJ_THREADS += TestThreads_SubView_c10.o TestThreads_SubView_c11.o TestThreads_SubView_c12.o
OBJ_THREADS += TestThreads_Reductions.o TestThreads_Scan.o
OBJ_THREADS += TestThreads_Reductions.o TestThreads_ParallelScanRangePolicy.o
OBJ_THREADS += TestThreads_Reductions_DeviceView.o
OBJ_THREADS += TestThreads_Reducers_a.o TestThreads_Reducers_b.o TestThreads_Reducers_c.o TestThreads_Reducers_d.o TestThreads_Reducers_e.o
OBJ_THREADS += TestThreads_Complex.o
Expand Down Expand Up @@ -207,7 +207,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
OBJ_OPENMP += TestOpenMP_SubView_c07.o TestOpenMP_SubView_c08.o TestOpenMP_SubView_c09.o
OBJ_OPENMP += TestOpenMP_SubView_c10.o TestOpenMP_SubView_c11.o TestOpenMP_SubView_c12.o
OBJ_OPENMP += TestOpenMP_SubView_c13.o
OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_Scan.o
OBJ_OPENMP += TestOpenMP_Reductions.o TestOpenMP_ParallelScanRangePolicy.o
OBJ_OPENMP += TestOpenMP_Reductions_DeviceView.o
OBJ_OPENMP += TestOpenMP_Reducers_a.o TestOpenMP_Reducers_b.o TestOpenMP_Reducers_c.o TestOpenMP_Reducers_d.o TestOpenMP_Reducers_e.o
OBJ_OPENMP += TestOpenMP_Complex.o
Expand Down Expand Up @@ -251,11 +251,11 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
#OBJ_OPENMPTARGET += TestOpenMPTarget_SubView_c10.o TestOpenMPTarget_SubView_c11.o TestOpenMPTarget_SubView_c12.o
#OBJ_OPENMPTARGET += TestOpenMPTarget_Reductions.o # Need custom reductions
OBJ_OPENMPTARGET += TestOpenMPTarget_Reducers_a.o TestOpenMPTarget_Reducers_b.o TestOpenMPTarget_Reducers_c.o TestOpenMPTarget_Reducers_d.o TestOpenMPTarget_Reducers_e.o
#OBJ_OPENMPTARGET += TestOpenMPTarget_Scan.o
OBJ_OPENMPTARGET += TestOpenMPTarget_ParallelScanRangePolicy.o
OBJ_OPENMPTARGET += TestOpenMPTarget_Complex.o
OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_int.o TestOpenMPTarget_AtomicOperations_unsignedint.o TestOpenMPTarget_AtomicOperations_longint.o
OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_unsignedlongint.o TestOpenMPTarget_AtomicOperations_longlongint.o TestOpenMPTarget_AtomicOperations_double.o TestOpenMPTarget_AtomicOperations_float.o
#OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o
#OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexfloat.o
#OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicOperations_complexdouble.o
OBJ_OPENMPTARGET += TestOpenMPTarget_AtomicViews.o
OBJ_OPENMPTARGET += TestOpenMPTarget_Atomics.o # Commented Out Arbitrary Type Atomics
Expand Down Expand Up @@ -316,7 +316,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_HPX), 1)
OBJ_HPX += TestHPX_SubView_c10.o TestHPX_SubView_c11.o TestHPX_SubView_c12.o
OBJ_HPX += TestHPX_SubView_c13.o
OBJ_HPX += TestHPX_Reductions.o
OBJ_HPX += TestHPX_Scan.o
OBJ_HPX += TestHPX_ParallelScanRangePolicy.o
OBJ_HPX += TestHPX_Reducers_a.o TestHPX_Reducers_b.o TestHPX_Reducers_c.o TestHPX_Reducers_d.o TestHPX_Reducers_e.o
OBJ_HPX += TestHPX_Complex.o
OBJ_HPX += TestHPX_AtomicOperations_int.o TestHPX_AtomicOperations_unsignedint.o TestHPX_AtomicOperations_longint.o
Expand Down Expand Up @@ -355,7 +355,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
OBJ_SERIAL += TestSerial_SubView_c07.o TestSerial_SubView_c08.o TestSerial_SubView_c09.o
OBJ_SERIAL += TestSerial_SubView_c10.o TestSerial_SubView_c11.o TestSerial_SubView_c12.o
OBJ_SERIAL += TestSerial_SubView_c13.o
OBJ_SERIAL += TestSerial_Reductions.o TestSerial_Scan.o
OBJ_SERIAL += TestSerial_Reductions.o TestSerial_ParallelScanRangePolicy.o
OBJ_SERIAL += TestSerial_Reductions_DeviceView.o
OBJ_SERIAL += TestSerial_Reducers_a.o TestSerial_Reducers_b.o TestSerial_Reducers_c.o TestSerial_Reducers_d.o TestSerial_Reducers_e.o
OBJ_SERIAL += TestSerial_Complex.o
Expand Down
253 changes: 253 additions & 0 deletions core/unit_test/TestParallelScanRangePolicy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,253 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#include <gtest/gtest.h>

#include <Kokkos_Core.hpp>
#include <cstdio>

// This test checks parallel_scan() calls which use RangePolicy.

namespace {

template <typename ValueType>
struct TestParallelScanRangePolicy {
// This typedef is needed for parallel_scan() where a
// work count is given (instead of a RangePolicy) so
// that the execution space can be deduced internally.
using execution_space = TEST_EXECSPACE;

using ViewType = Kokkos::View<ValueType*, execution_space>;

ViewType prefix_results;
ViewType postfix_results;

// Operator defining work done in parallel_scan.
// Simple scan over [0,1,...,N-1].
// Compute both prefix and postfix scans.
KOKKOS_INLINE_FUNCTION
void operator()(const size_t i, ValueType& update, bool final_pass) const {
if (final_pass) {
prefix_results(i) = update;
}
update += i;
if (final_pass) {
postfix_results(i) = update;
}
}

KOKKOS_INLINE_FUNCTION
void init(ValueType& update) const { update = 0; }

KOKKOS_INLINE_FUNCTION
void join(ValueType& update, const ValueType& input) const {
update += input;
}

template <typename... Args>
void test_scan(const size_t work_size) {
// Reset member data based on work_size
prefix_results = ViewType("prefix_results", work_size);
postfix_results = ViewType("postfix_results", work_size);

// Lambda for checking errors from stored value at each index.
auto check_scan_results = [&]() {
auto const prefix_h = Kokkos::create_mirror_view_and_copy(
Kokkos::HostSpace(), prefix_results);
auto const postfix_h = Kokkos::create_mirror_view_and_copy(
Kokkos::HostSpace(), postfix_results);

for (size_t i = 0; i < work_size; ++i) {
// Check prefix sum
ASSERT_EQ(ValueType((i * (i - 1)) / 2), prefix_h(i));

// Check postfix sum
ASSERT_EQ(ValueType(((i + 1) * i) / 2), postfix_h(i));
}

// Reset results
Kokkos::deep_copy(prefix_results, 0);
Kokkos::deep_copy(postfix_results, 0);
};

// If policy template args are not given, call parallel_scan()
// with work_size input, if args are given, call
// parallel_scan() with RangePolicy<Args...>(0, work_size).
// For each case, call parallel_scan() with all possible
// function signatures.
if (sizeof...(Args) == 0) {
// Input: label, work_count, functor
Kokkos::parallel_scan("TestWithStrArg1", work_size, *this);
check_scan_results();

// Input: work_count, functor
Kokkos::parallel_scan(work_size, *this);
check_scan_results();

// Input: label, work_count, functor
// Input/Output: return_value
{
ValueType return_val = 0;
Kokkos::parallel_scan("TestWithStrArg2", work_size, *this, return_val);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_val); // sum( 0 .. N-1 )
}

// Input: work_count, functor
// Input/Output: return_value
{
ValueType return_val = 0;
Kokkos::parallel_scan(work_size, *this, return_val);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_val); // sum( 0 .. N-1 )
}

// Input: work_count, functor
// Input/Output: return_view (host space)
{
Kokkos::View<ValueType, Kokkos::HostSpace> return_view("return_view");
Kokkos::parallel_scan(work_size, *this, return_view);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_view()); // sum( 0 .. N-1 )
}
} else {
// Construct RangePolicy for parallel_scan
// based on template Args and work_size.
Kokkos::RangePolicy<execution_space, Args...> policy(0, work_size);

// Input: label, work_count, functor
Kokkos::parallel_scan("TestWithStrArg3", policy, *this);
check_scan_results();

// Input: work_count, functor
Kokkos::parallel_scan(policy, *this);
check_scan_results();

{
// Input: label, work_count, functor
// Input/Output: return_value
ValueType return_val = 0;
Kokkos::parallel_scan("TestWithStrArg4", policy, *this, return_val);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_val); // sum( 0 .. N-1 )
}

// Input: work_count, functor
// Input/Output: return_value
{
ValueType return_val = 0;
Kokkos::parallel_scan(policy, *this, return_val);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_val); // sum( 0 .. N-1 )
}

// Input: work_count, functor
// Input/Output: return_view (Device)
{
Kokkos::View<ValueType, execution_space> return_view("return_view");
Kokkos::parallel_scan(policy, *this, return_view);
check_scan_results();

ValueType total;
Kokkos::deep_copy(total, return_view);
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
total); // sum( 0 .. N-1 )
}

// Check Kokkos::Experimental::require()
// for one of the signatures.
{
using Property =
Kokkos::Experimental::WorkItemProperty::HintLightWeight_t;
const auto policy_with_require =
Kokkos::Experimental::require(policy, Property());

// Input: work_count, functor
// Input/Output: return_value
ValueType return_val = 0;
Kokkos::parallel_scan(policy_with_require, *this, return_val);
check_scan_results();
ASSERT_EQ(ValueType(work_size * (work_size - 1) / 2),
return_val); // sum( 0 .. N-1 )
}
}
}

// Run test_scan() for a collection of work size
template <typename... Args>
void test_scan(const std::vector<size_t> work_sizes) {
for (size_t i = 0; i < work_sizes.size(); ++i) {
test_scan<Args...>(work_sizes[i]);
}
}
}; // struct TestParallelScanRangePolicy

TEST(TEST_CATEGORY, parallel_scan_range_policy) {
{
TestParallelScanRangePolicy<char> f;

std::vector<size_t> work_sizes{5, 10};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
{
TestParallelScanRangePolicy<short int> f;

std::vector<size_t> work_sizes{50, 100};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
{
TestParallelScanRangePolicy<int> f;

std::vector<size_t> work_sizes{0, 1, 2, 1000, 1001};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
{
TestParallelScanRangePolicy<long int> f;

std::vector<size_t> work_sizes{1000, 10000};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
{
TestParallelScanRangePolicy<float> f;

std::vector<size_t> work_sizes{13, 34};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
{
TestParallelScanRangePolicy<double> f;

std::vector<size_t> work_sizes{17, 59};
f.test_scan<>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Static>>(work_sizes);
f.test_scan<Kokkos::Schedule<Kokkos::Dynamic>>(work_sizes);
}
}
} // namespace

0 comments on commit d7896e6

Please sign in to comment.