Skip to content

Commit

Permalink
Fix a memory bug in the free_state function of random pools. (kokko…
Browse files Browse the repository at this point in the history
…s#6290)

* Insert memory fences in random pools' free_state() function between state update and lock release operations.

* added unit test to check for duplicate streams generated by random pools

* apply clang-format-8 on source files

* Addressed reviewer comments

* little refactor

* skip openmp target for now

* Skip the test for SYCL+CUDA platform
  • Loading branch information
Shihab-Shahriar committed Jul 21, 2023
1 parent 2dfb861 commit 4d1c6c3
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 1 deletion.
6 changes: 5 additions & 1 deletion algorithms/src/Kokkos_Random.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,6 +956,8 @@ class Random_XorShift64_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift64<DeviceType>& state) const {
state_(state.state_idx_, 0) = state.state_;
// Release the lock only after the state has been updated in memory
Kokkos::memory_fence();
locks_(state.state_idx_, 0) = 0;
}
};
Expand Down Expand Up @@ -1208,7 +1210,9 @@ class Random_XorShift1024_Pool {
KOKKOS_INLINE_FUNCTION
void free_state(const Random_XorShift1024<DeviceType>& state) const {
for (int i = 0; i < 16; i++) state_(state.state_idx_, i) = state.state_[i];
p_(state.state_idx_, 0) = state.p_;
p_(state.state_idx_, 0) = state.p_;
// Release the lock only after the state has been updated in memory
Kokkos::memory_fence();
locks_(state.state_idx_, 0) = 0;
}
};
Expand Down
92 changes: 92 additions & 0 deletions algorithms/unit_tests/TestRandom.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
#include <Kokkos_Random.hpp>
#include <cmath>
#include <chrono>
#include <vector>
#include <algorithm>
#include <numeric>

namespace Test {
namespace AlgoRandomImpl {
Expand Down Expand Up @@ -469,6 +472,73 @@ struct TestDynRankView {
}
};

template <class ExecutionSpace, class GeneratorPool>
struct generate_random_stream {
using ViewType = Kokkos::View<uint64_t**, ExecutionSpace>;

ViewType vals;
GeneratorPool rand_pool;
int samples;

generate_random_stream(ViewType vals_, GeneratorPool rand_pool_, int samples_)
: vals(vals_), rand_pool(rand_pool_), samples(samples_) {}

KOKKOS_INLINE_FUNCTION
void operator()(int i) const {
typename GeneratorPool::generator_type rand_gen = rand_pool.get_state();

for (int k = 0; k < samples; k++) vals(i, k) = rand_gen.urand64();

rand_pool.free_state(rand_gen);
}
};

// NOTE: this doesn't test the statistical independence of multiple streams
// generated by a Random pool, it only tests for complete duplicates.
template <class ExecutionSpace, class Pool>
void test_duplicate_stream() {
using ViewType = Kokkos::View<uint64_t**, ExecutionSpace>;

// Heuristic to create a "large enough" number of streams.
int n_streams = ExecutionSpace{}.concurrency() * 4;
int samples = 8;

Pool rand_pool(42);
ViewType vals_d("Vals", n_streams, samples);

Kokkos::parallel_for(
Kokkos::RangePolicy<ExecutionSpace>(0, n_streams),
generate_random_stream<ExecutionSpace, Pool>(vals_d, rand_pool, samples));

auto vals_h =
Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, vals_d);

/*
To quickly find streams that are identical, we sort them by the first number,
if that's equal then the second and so on. We then test each neighbor pair
for duplicates.
*/
std::vector<size_t> indices(n_streams);
std::iota(indices.begin(), indices.end(), 0);

auto comparator = [&](int i, int j) {
for (int k = 0; k < samples; k++) {
if (vals_h(i, k) != vals_h(j, k)) return vals_h(i, k) < vals_h(j, k);
}
return false;
};
std::sort(indices.begin(), indices.end(), comparator);

for (int i = 0; i < n_streams - 1; i++) {
int idx1 = indices[i];
int idx2 = indices[i + 1];

int k = 0;
while (k < samples && vals_h(idx1, k) == vals_h(idx2, k)) k++;
ASSERT_LT(k, samples) << "Duplicate streams found";
}
}

} // namespace AlgoRandomImpl

TEST(TEST_CATEGORY, Random_XorShift64) {
Expand Down Expand Up @@ -509,5 +579,27 @@ TEST(TEST_CATEGORY, Random_XorShift1024_0) {
.run();
}

TEST(TEST_CATEGORY, Multi_streams) {
using ExecutionSpace = TEST_EXECSPACE;
#ifdef KOKKOS_ENABLE_OPENMPTARGET
if constexpr (std::is_same_v<ExecutionSpace,
Kokkos::Experimental::OpenMPTarget>) {
GTEST_SKIP() << "Libomptarget error"; // FIXME_OPENMPTARGET
}
#endif

#if defined(KOKKOS_ENABLE_SYCL) && defined(KOKKOS_IMPL_ARCH_NVIDIA_GPU)
if constexpr (std::is_same_v<ExecutionSpace, Kokkos::Experimental::SYCL>) {
GTEST_SKIP() << "Failing on NVIDIA GPUs"; // FIXME_SYCL
}
#endif

using Pool64 = Kokkos::Random_XorShift64_Pool<ExecutionSpace>;
using Pool1024 = Kokkos::Random_XorShift1024_Pool<ExecutionSpace>;

AlgoRandomImpl::test_duplicate_stream<ExecutionSpace, Pool64>();
AlgoRandomImpl::test_duplicate_stream<ExecutionSpace, Pool1024>();
}

} // namespace Test
#endif

0 comments on commit 4d1c6c3

Please sign in to comment.