Skip to content

Commit

Permalink
Clean up benchmarks/gups
Browse files Browse the repository at this point in the history
* Use stdlib for time and random numbers
* Use Kokkos default spaces rather than explicit spaces
* add a permutation mode
  • Loading branch information
cwpearson committed Sep 15, 2023
1 parent 615fc1a commit 002cce0
Showing 1 changed file with 96 additions and 76 deletions.
172 changes: 96 additions & 76 deletions benchmarks/gups/gups-kokkos.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,125 +14,144 @@
//
//@HEADER

/*! \brief file gups.cpp
An implementation of something like HPCC RandomAccess.
*/

#include "Kokkos_Core.hpp"
#include <cstdio>
#include <cstdlib>
#include <cmath>

#include <sys/time.h>
#include <chrono>
#include <numeric>
#include <algorithm>
#include <random>

#define HLINE "-------------------------------------------------------------\n"

#if defined(KOKKOS_ENABLE_CUDA)
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>::HostMirror;
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::CudaSpace>;
#else
using GUPSHostArray = Kokkos::View<int64_t*, Kokkos::HostSpace>::HostMirror;
using GUPSDeviceArray = Kokkos::View<int64_t*, Kokkos::HostSpace>;
#endif
using Index = int;
using Datum = int64_t;

using GUPSIndex = int;
using IndexView = Kokkos::View<Index*>;
using DataView = Kokkos::View<Datum*>;

double now() {
struct timeval now;
gettimeofday(&now, nullptr);
using Clock = std::chrono::steady_clock;
using Duration = std::chrono::duration<double>;

return (double)now.tv_sec + ((double)now.tv_usec * 1.0e-6);
using RandomDevice = std::random_device;
using RNG = std::mt19937;

IndexView randomized_indices(const Index indicesCount, const Index dataCount,
RNG& rng) {
// generate random indices 0..dataCount
std::uniform_int_distribution<Index> uid(0, dataCount);
std::vector<Index> indices(indicesCount);
std::generate(indices.begin(), indices.end(), [&]() { return uid(rng); });

// Copy to the default space and return
Kokkos::View<Index*, Kokkos::HostSpace, Kokkos::MemoryUnmanaged>
unmanaged_indices(indices.data(), indices.size());
IndexView dev_indices("dev_indices", indicesCount);
Kokkos::deep_copy(dev_indices, unmanaged_indices);
return dev_indices;
}

void randomize_indices(GUPSHostArray& indices, GUPSDeviceArray& dev_indices,
const int64_t dataCount) {
for (GUPSIndex i = 0; i < indices.extent(0); ++i) {
indices[i] = lrand48() % dataCount;
IndexView permuted_indices(const Index indicesCount, const Index dataCount,
RNG& rng) {
// create a permutation array of offsets into the data
std::vector<Index> perm(dataCount);
std::iota(perm.begin(), perm.end(), 0);
std::shuffle(perm.begin(), perm.end(), rng);

// indices is repeated copies of the permutation array
// (or the first entries of the permutation array if there
// are fewer indices than data elements)
IndexView dev_indices("dev_indices", indicesCount);
auto indices = Kokkos::create_mirror_view(dev_indices);
for (Index i = 0; i < Index(indices.extent(0)); ++i) {
indices(i) = perm[i % perm.size()];
}

// Copy to the default space and return

Kokkos::deep_copy(dev_indices, indices);
return dev_indices;
}

void run_gups(GUPSDeviceArray& indices, GUPSDeviceArray& data,
const int64_t datum, const bool performAtomics) {
void run_gups(IndexView& indices, DataView& data, const Datum datum,
const bool performAtomics) {
if (performAtomics) {
Kokkos::parallel_for(
"bench-gups-atomic", indices.extent(0),
KOKKOS_LAMBDA(const GUPSIndex i) {
"bench-gups-atomic", indices.extent(0), KOKKOS_LAMBDA(const Index i) {
Kokkos::atomic_fetch_xor(&data[indices[i]], datum);
});
} else {
Kokkos::parallel_for(
"bench-gups-non-atomic", indices.extent(0),
KOKKOS_LAMBDA(const GUPSIndex i) { data[indices[i]] ^= datum; });
KOKKOS_LAMBDA(const Index i) { data[indices[i]] ^= datum; });
}

Kokkos::fence();
}

int run_benchmark(const GUPSIndex indicesCount, const GUPSIndex dataCount,
const int repeats, const bool useAtomics) {
enum class AccessPattern { random, permutation };

int run_benchmark(const Index indicesCount, const Index dataCount,
const int repeats, const bool useAtomics,
const AccessPattern pattern) {
constexpr auto arbitrary_seed = 20230913;
RNG rng(arbitrary_seed);

printf("Reports fastest timing per kernel\n");
printf("Creating Views...\n");

printf("Memory Sizes:\n");
printf("- Elements: %15" PRIu64 " (%12.4f MB)\n",
static_cast<uint64_t>(dataCount),
1.0e-6 * ((double)dataCount * (double)sizeof(int64_t)));
1.0e-6 * ((double)dataCount * (double)sizeof(Datum)));
printf("- Indices: %15" PRIu64 " (%12.4f MB)\n",
static_cast<uint64_t>(indicesCount),
1.0e-6 * ((double)indicesCount * (double)sizeof(int64_t)));
1.0e-6 * ((double)indicesCount * (double)sizeof(Index)));
printf(" - Atomics: %15s\n", (useAtomics ? "Yes" : "No"));
printf("Benchmark kernels will be performed for %d iterations.\n", repeats);

printf(HLINE);

GUPSDeviceArray dev_indices("indices", indicesCount);
GUPSDeviceArray dev_data("data", dataCount);
int64_t datum = -1;

GUPSHostArray indices = Kokkos::create_mirror_view(dev_indices);
GUPSHostArray data = Kokkos::create_mirror_view(dev_data);

double gupsTime = 0.0;

printf("Initializing Views...\n");

#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::OpenMP>(0, dataCount),
#else
printf("Initializing Data...\n");
DataView data("data", dataCount);
Kokkos::parallel_for(
"init-data", Kokkos::RangePolicy<Kokkos::Serial>(0, dataCount),
#endif
"init-data",
Kokkos::RangePolicy<Kokkos::DefaultExecutionSpace>(0, dataCount),
KOKKOS_LAMBDA(const int i) { data[i] = 10101010101; });

#if defined(KOKKOS_HAVE_OPENMP)
Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::OpenMP>(0, indicesCount),
#else
Kokkos::parallel_for(
"init-indices", Kokkos::RangePolicy<Kokkos::Serial>(0, indicesCount),
#endif
KOKKOS_LAMBDA(const int i) { indices[i] = 0; });

Kokkos::deep_copy(dev_data, data);
Kokkos::deep_copy(dev_indices, indices);
double start;

printf("Starting benchmarking...\n");
double gupsTime = 0.0;
constexpr Datum datum = -1;
for (Index k = 0; k < repeats; ++k) {
IndexView indices;
switch (pattern) {
case AccessPattern::random: {
indices = randomized_indices(indicesCount, dataCount, rng);
break;
}
case AccessPattern::permutation: {
indices = permuted_indices(indicesCount, dataCount, rng);
break;
}
default: {
throw std::runtime_error("unexpected mode");
}
}

for (GUPSIndex k = 0; k < repeats; ++k) {
randomize_indices(indices, dev_indices, data.extent(0));

start = now();
run_gups(dev_indices, dev_data, datum, useAtomics);
gupsTime += now() - start;
auto start = Clock::now();
run_gups(indices, data, datum, useAtomics);
gupsTime += Duration(Clock::now() - start).count();
}

Kokkos::deep_copy(indices, dev_indices);
Kokkos::deep_copy(data, dev_data);

printf(HLINE);
printf(
"GUP/s Random: %18.6f\n",
(1.0e-9 * ((double)repeats) * (double)dev_indices.extent(0)) / gupsTime);
printf("GUP/s Random: %18.6f\n",
(1.0e-9 * ((double)repeats) * (double)indicesCount) / gupsTime);
printf(HLINE);

return 0;
Expand All @@ -143,14 +162,13 @@ int main(int argc, char* argv[]) {
printf("Kokkos GUPS Benchmark\n");
printf(HLINE);

srand48(1010101);

Kokkos::initialize(argc, argv);

int64_t indices = 8192;
int64_t data = 33554432;
int64_t repeats = 10;
bool useAtomics = false;
int64_t indices = 8192;
int64_t data = 33554432;
int64_t repeats = 10;
bool useAtomics = false;
AccessPattern pattern = AccessPattern::random;

for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "--indices") == 0) {
Expand All @@ -164,10 +182,12 @@ int main(int argc, char* argv[]) {
++i;
} else if (strcmp(argv[i], "--atomics") == 0) {
useAtomics = true;
} else if (strcmp(argv[i], "--pattern-permutation") == 0) {
pattern = AccessPattern::permutation;
}
}

const int rc = run_benchmark(indices, data, repeats, useAtomics);
const int rc = run_benchmark(indices, data, repeats, useAtomics, pattern);

Kokkos::finalize();

Expand Down

0 comments on commit 002cce0

Please sign in to comment.