Skip to content

Commit

Permalink
fix checkpointing with >2GB per process
Browse files Browse the repository at this point in the history
This fixes save/load of fixed and variable checkpointing where
individual ranks write more than 2GBs of data.
Part of dealii#12873 and dealii#12752
  • Loading branch information
tjhei committed Nov 20, 2021
1 parent 108f08a commit 65d8f01
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 32 deletions.
149 changes: 119 additions & 30 deletions source/distributed/tria_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1451,13 +1451,34 @@ namespace parallel
size_header +
static_cast<MPI_Offset>(global_first_cell) * bytes_per_cell;

ierr = MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(src_data_fixed.data()),
src_data_fixed.size(),
MPI_CHAR,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
if (src_data_fixed.size() <=
static_cast<std::size_t>(std::numeric_limits<int>::max()))
{
ierr =
MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(src_data_fixed.data()),
src_data_fixed.size(),
MPI_BYTE,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
}
else
{
// Writes bigger than 2GB require some extra care:
MPI_Datatype bigtype =
Utilities::MPI::create_mpi_data_type_n_bytes(src_data_fixed.size());
ierr =
MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(src_data_fixed.data()),
1,
bigtype,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
ierr = MPI_Type_free(&bigtype);
AssertThrowMPI(ierr);
}

ierr = MPI_File_close(&fh);
AssertThrowMPI(ierr);
Expand Down Expand Up @@ -1497,6 +1518,13 @@ namespace parallel
const MPI_Offset my_global_file_position =
static_cast<MPI_Offset>(global_first_cell) * sizeof(unsigned int);

// It is very unlikely that a single process has more than 2 billion
// cells, but we might as well check.
AssertThrow(src_sizes_variable.size() <
static_cast<std::size_t>(
std::numeric_limits<int>::max()),
ExcNotImplemented());

ierr =
MPI_File_write_at(fh,
my_global_file_position,
Expand Down Expand Up @@ -1525,14 +1553,35 @@ namespace parallel
prefix_sum;

// Write data consecutively into file.
ierr =
MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(src_data_variable.data()),
src_data_variable.size(),
MPI_CHAR,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
if (src_data_variable.size() <=
static_cast<std::size_t>(std::numeric_limits<int>::max()))
{
ierr = MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(
src_data_variable.data()),
src_data_variable.size(),
MPI_BYTE,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
}
else
{
// Writes bigger than 2GB require some extra care:
MPI_Datatype bigtype = Utilities::MPI::create_mpi_data_type_n_bytes(
src_data_variable.size());
ierr = MPI_File_write_at(fh,
my_global_file_position,
DEAL_II_MPI_CONST_CAST(
src_data_variable.data()),
1,
bigtype,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
ierr = MPI_Type_free(&bigtype);
AssertThrowMPI(ierr);
}


ierr = MPI_File_close(&fh);
AssertThrowMPI(ierr);
Expand Down Expand Up @@ -1618,13 +1667,32 @@ namespace parallel
size_header +
static_cast<MPI_Offset>(global_first_cell) * bytes_per_cell;

ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_fixed.data(),
dest_data_fixed.size(), // local buffer
MPI_CHAR,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
if (dest_data_fixed.size() <=
static_cast<std::size_t>(std::numeric_limits<int>::max()))
{
ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_fixed.data(),
dest_data_fixed.size(),
MPI_BYTE,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
}
else
{
// Reads bigger than 2GB require some extra care:
MPI_Datatype bigtype = Utilities::MPI::create_mpi_data_type_n_bytes(
dest_data_fixed.size());
ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_fixed.data(),
1,
bigtype,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
ierr = MPI_Type_free(&bigtype);
AssertThrowMPI(ierr);
}

ierr = MPI_File_close(&fh);
AssertThrowMPI(ierr);
Expand Down Expand Up @@ -1673,7 +1741,7 @@ namespace parallel
const std::uint64_t size_on_proc =
std::accumulate(dest_sizes_variable.begin(),
dest_sizes_variable.end(),
0);
0ULL);

std::uint64_t prefix_sum = 0;
ierr = MPI_Exscan(DEAL_II_MPI_CONST_CAST(&size_on_proc),
Expand All @@ -1689,13 +1757,34 @@ namespace parallel
prefix_sum;

dest_data_variable.resize(size_on_proc);
ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_variable.data(),
dest_data_variable.size(),
MPI_CHAR,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);

if (dest_data_variable.size() <=
static_cast<std::size_t>(std::numeric_limits<int>::max()))
{
ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_variable.data(),
dest_data_variable.size(),
MPI_BYTE,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
}
else
{
// Reads bigger than 2GB require some extra care:
MPI_Datatype bigtype = Utilities::MPI::create_mpi_data_type_n_bytes(
src_data_fixed.size());
ierr = MPI_File_read_at(fh,
my_global_file_position,
dest_data_variable.data(),
1,
bigtype,
MPI_STATUS_IGNORE);
AssertThrowMPI(ierr);
ierr = MPI_Type_free(&bigtype);
AssertThrowMPI(ierr);
}


ierr = MPI_File_close(&fh);
AssertThrowMPI(ierr);
Expand Down
8 changes: 6 additions & 2 deletions tests/distributed_grids/checkpointing_02.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
// Test (de)serialization of Trilinos vectors with checkpointing files >4GB. The
// test here is of course simplified to run quickly.

// set to true to run a test that generates a 5GB file:
// Set this to true to run a test that generates an 8GB file. Run with
// 5 MPI ranks to check correctness with a total file size above 4GB.
// Run with 2 MPI ranks to test a single process above 2GB. Both cases were
// broken before this test was made. Warning, you probably need in the
// order of 32GB of RAM to run this.
const bool big = false;

#include <deal.II/base/conditional_ostream.h>
Expand Down Expand Up @@ -156,7 +160,7 @@ LaplaceProblem<dim>::run(unsigned int n_cycles_global,

setup_system();

const unsigned int n_vectors = (big) ? 50 : 2;
const unsigned int n_vectors = (big) ? 70 : 2;
{
deallog << "checkpointing..." << std::endl;
std::vector<VectorType> vectors(n_vectors);
Expand Down

0 comments on commit 65d8f01

Please sign in to comment.