Skip to content

Commit

Permalink
Merge pull request #3421 from anagainaru/cuda-testing
Browse files Browse the repository at this point in the history
Testing for GPU backend for correct/incorrect memory spaces
  • Loading branch information
anagainaru committed Dec 27, 2022
2 parents 83b798f + 38f34bc commit cf22992
Show file tree
Hide file tree
Showing 2 changed files with 191 additions and 13 deletions.
2 changes: 0 additions & 2 deletions source/adios2/helper/adiosCUDA.h
Expand Up @@ -6,8 +6,6 @@
#ifndef ADIOS2_HELPER_ADIOSCUDA_H_
#define ADIOS2_HELPER_ADIOSCUDA_H_

#include <cstddef>

namespace adios2
{
namespace helper
Expand Down
202 changes: 191 additions & 11 deletions testing/adios2/engine/bp/TestBPWriteReadCuda.cpp
Expand Up @@ -18,20 +18,198 @@ std::string engineName; // comes from command line
const float EPSILON = std::numeric_limits<float>::epsilon();
const float INCREMENT = 10.0f;

void RateCUDA(const std::string mode)
void CUDAWrongMemSpace()
{
const std::string fname("BPWRCUFail.bp");
const size_t Nx = 5;

adios2::ADIOS adios;
std::vector<float> r32s(Nx, .0f);
std::iota(r32s.begin(), r32s.end(), .0f);
{ // write
adios2::IO io = adios.DeclareIO("TestIO");
const adios2::Dims shape{Nx};
const adios2::Dims start{0};
const adios2::Dims count{Nx};
auto var_r32 = io.DefineVariable<float>("r32", shape, start, count);
auto var_r32_cpu =
io.DefineVariable<float>("r32cpu", shape, start, count);

float *gpuSimData = nullptr;
cudaMalloc(&gpuSimData, Nx * sizeof(float));
cudaMemcpy(gpuSimData, (float *)&r32s[0], Nx * sizeof(float),
cudaMemcpyHostToDevice);

io.SetEngine("BP5");
if (!engineName.empty())
{
io.SetEngine(engineName);
}
adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);

bpWriter.BeginStep();
var_r32.SetMemorySpace(adios2::MemorySpace::Host);
EXPECT_DEATH(bpWriter.Put(var_r32, gpuSimData), "");
var_r32_cpu.SetMemorySpace(adios2::MemorySpace::CUDA);
bpWriter.Put(var_r32_cpu, r32s.data());
bpWriter.EndStep();

bpWriter.Close();
}
{ // read
adios2::IO io = adios.DeclareIO("ReadIO");
io.SetEngine("BP5");
if (!engineName.empty())
{
io.SetEngine(engineName);
}

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

bpReader.BeginStep();
auto var_r32 = io.InquireVariable<float>("r32cpu");
EXPECT_TRUE(var_r32);

std::vector<float> r32o(Nx);
float *gpuSimData;
cudaMalloc(&gpuSimData, Nx * sizeof(float));
var_r32.SetMemorySpace(adios2::MemorySpace::Host);
EXPECT_THROW(bpReader.Get(var_r32, gpuSimData, adios2::Mode::Sync),
std::ios_base::failure);
var_r32.SetMemorySpace(adios2::MemorySpace::CUDA);
EXPECT_THROW(bpReader.Get(var_r32, r32o.data(), adios2::Mode::Sync),
std::ios_base::failure);
// bpReader.EndStep();
// bpReader.Close();
}
}

void CUDADetectMemSpace(const std::string mode)
{
const std::string fname("BPWRCUDetect" + mode + ".bp");
adios2::Mode ioMode = adios2::Mode::Deferred;
if (mode == "Sync")
ioMode = adios2::Mode::Sync;

// Number of rows
const size_t Nx = 5;
// Number of columns
const size_t Ny = 2;
const size_t NTotal = Nx * Ny;
// Number of steps
const size_t NSteps = 10;

adios2::ADIOS adios;
// simulation data
std::vector<float> r32s(NTotal, .0f);
std::iota(r32s.begin(), r32s.end(), .0f);

{ // write
adios2::IO io = adios.DeclareIO("TestIO");
const adios2::Dims shape{Ny, Nx};
const adios2::Dims start{0, 0};
const adios2::Dims count{Ny, Nx};
auto var_r32 = io.DefineVariable<float>("r32", shape, start, count);
EXPECT_TRUE(var_r32);

float *gpuSimData = nullptr;
cudaMalloc(&gpuSimData, NTotal * sizeof(float));
cudaMemcpy(gpuSimData, (float *)&r32s[0], NTotal * sizeof(float),
cudaMemcpyHostToDevice);

io.SetEngine("BP5");

if (!engineName.empty())
{
io.SetEngine(engineName);
}
adios2::Engine bpWriter = io.Open(fname, adios2::Mode::Write);

for (size_t step = 0; step < NSteps; ++step)
{
// Update values in the simulation data
cuda_increment(NTotal, 1, 0, gpuSimData, INCREMENT);
std::transform(r32s.begin(), r32s.end(), r32s.begin(),
std::bind(std::plus<float>(), std::placeholders::_1,
INCREMENT));

bpWriter.BeginStep();
if (step % 2 == 0)
bpWriter.Put(var_r32, gpuSimData, ioMode);
else
bpWriter.Put(var_r32, r32s.data(), ioMode);
bpWriter.EndStep();
}

bpWriter.Close();
}
// reset the initial data
std::iota(r32s.begin(), r32s.end(), .0f);
{ // read
adios2::IO io = adios.DeclareIO("ReadIO");
io.SetEngine("BP5");

if (!engineName.empty())
{
io.SetEngine(engineName);
}

adios2::Engine bpReader = io.Open(fname, adios2::Mode::Read);

unsigned int t = 0;
for (; bpReader.BeginStep() == adios2::StepStatus::OK; ++t)
{
auto var_r32 = io.InquireVariable<float>("r32");
EXPECT_TRUE(var_r32);
ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
ASSERT_EQ(var_r32.Shape()[0], Ny);
ASSERT_EQ(var_r32.Shape()[1], Nx);

std::vector<float> r32o(NTotal);
float *gpuSimData;
cudaMalloc(&gpuSimData, NTotal * sizeof(float));
if (t % 2 == 0)
{
bpReader.Get(var_r32, r32o.data(), ioMode);
bpReader.EndStep();
}
else
{
bpReader.Get(var_r32, gpuSimData, ioMode);
bpReader.EndStep();
cudaMemcpy(r32o.data(), gpuSimData, NTotal * sizeof(float),
cudaMemcpyDeviceToHost);
}
// Remove INCREMENT from each element
std::transform(r32o.begin(), r32o.end(), r32o.begin(),
std::bind(std::minus<float>(), std::placeholders::_1,
(t + 1) * INCREMENT));

for (size_t i = 0; i < NTotal; i++)
{
char msg[1 << 8] = {0};
snprintf(msg, sizeof(msg), "t=%d i=%zu r32o=%f r32s=%f", t, i,
r32o[i], r32s[i]);
ASSERT_LT(std::abs(r32o[i] - r32s[i]), EPSILON) << msg;
}
}
EXPECT_EQ(t, NSteps);

bpReader.Close();
}
}

void CUDAWriteReadMPI1D(const std::string mode)
{
// Each process would write a 1x8 array and all processes would
// form a mpiSize * Nx 1D array
const std::string fname("BPWRCU1D_" + mode + ".bp");
adios2::Mode ioMode = adios2::Mode::Deferred;
if (mode == "Sync")
ioMode = adios2::Mode::Sync;

// Number of rows
const size_t Nx = 100;

// Number of steps
const size_t NSteps = 1;
const size_t NSteps = 10;

int mpiRank = 0, mpiSize = 1;
#if ADIOS2_USE_MPI
Expand Down Expand Up @@ -119,12 +297,11 @@ void RateCUDA(const std::string mode)
auto var_r32 = io.InquireVariable<float>("r32");
EXPECT_TRUE(var_r32);
ASSERT_EQ(var_r32.ShapeID(), adios2::ShapeID::GlobalArray);
ASSERT_EQ(var_r32.Steps(), NSteps);
ASSERT_EQ(var_r32.Shape()[0], NxTotal);

auto mmR32 = std::minmax_element(r32s.begin(), r32s.end());
EXPECT_EQ(var_r32.Min() - INCREMENT, *mmR32.first);
EXPECT_EQ(var_r32.Max() - INCREMENT, *mmR32.second);
EXPECT_EQ(var_r32.Min() - (t + 1) * INCREMENT, *mmR32.first);
EXPECT_EQ(var_r32.Max() - (t + 1) * INCREMENT, *mmR32.second);

std::vector<float> r32o(NxTotal);
float *gpuSimData;
Expand All @@ -138,7 +315,7 @@ void RateCUDA(const std::string mode)
// Remove INCREMENT from each element
std::transform(r32o.begin(), r32o.end(), r32o.begin(),
std::bind(std::minus<float>(), std::placeholders::_1,
INCREMENT));
(t + 1) * INCREMENT));

for (size_t i = 0; i < NxTotal; i++)
{
Expand All @@ -163,9 +340,12 @@ class BPWRCUDA : public ::testing::TestWithParam<std::string>
virtual void TearDown() {}
};

TEST_P(BPWRCUDA, ADIOS2BPWRCUDA) { RateCUDA(GetParam()); }
TEST_P(BPWRCUDA, ADIOS2BPWRCUDA1D) { CUDAWriteReadMPI1D(GetParam()); }
TEST_P(BPWRCUDA, ADIOS2BPCUDADetect) { CUDADetectMemSpace(GetParam()); }
TEST_P(BPWRCUDA, ADIOS2BPCUDAWrong) { CUDAWrongMemSpace(); }

INSTANTIATE_TEST_SUITE_P(Rate, BPWRCUDA, ::testing::Values("deferred", "sync"));
INSTANTIATE_TEST_SUITE_P(CudaRW, BPWRCUDA,
::testing::Values("deferred", "sync"));

int main(int argc, char **argv)
{
Expand Down

0 comments on commit cf22992

Please sign in to comment.