Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BP5 read direct to application memory (1 dimensional case) #3387

Merged
merged 1 commit into from
Nov 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 93 additions & 21 deletions source/adios2/toolkit/format/bp5/BP5Deserializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1219,6 +1219,7 @@ bool BP5Deserializer::QueueGetSingle(core::VariableBase &variable,
Req.Count = variable.m_Count;
}
Req.Data = DestData;
Req.MemSpace = MemSpace;
eisenhauer marked this conversation as resolved.
Show resolved Hide resolved
Req.Step = Step;
PendingRequests.push_back(Req);
}
Expand Down Expand Up @@ -1298,6 +1299,26 @@ static size_t CalcBlockLength(const size_t dimensionsSize, const size_t *count)
return len;
}

/*
* Return true if for Req and data source info given by offsets and count,
* this a transfer of a contiguous block of memory into another
* contiguous block of memory. In its current usage, its OK if this
* function returns false in circumstances where the data is really
* contiguous, but it should never return true when it is not
* contiguous.
*/
bool BP5Deserializer::IsContiguousTransfer(BP5ArrayRequest *Req,
size_t *offsets, size_t *count)
{
/*
* All 1 dimensional requests in ADIOS involve the transfer of
* contiguous blocks. Multidimensional requests may or may not
* involve contiguous blocks, but for now all multimensional
* requests are assumed to be non-contiguous.
*/
return (Req->VarRec->DimCount == 1);
}

std::vector<BP5Deserializer::ReadRequest>
BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
size_t *maxReadSize)
Expand All @@ -1308,6 +1329,7 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
for (size_t ReqIndex = 0; ReqIndex < PendingRequests.size(); ReqIndex++)
{
auto Req = &PendingRequests[ReqIndex];
VariableBase *VB = static_cast<VariableBase *>(Req->VarRec->Variable);
if (Req->RequestType == Local)
{
const size_t writerCohortSize = WriterCohortSize(Req->Step);
Expand Down Expand Up @@ -1336,20 +1358,41 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
RR.StartOffset =
writer_meta_base->DataBlockLocation[NeededBlock];

RR.DirectToAppMemory = IsContiguousTransfer(
Req, &writer_meta_base->Offsets[StartDim],
&writer_meta_base->Count[StartDim]);
RR.ReadLength =
helper::GetDataTypeSize(Req->VarRec->Type) *
CalcBlockLength(Req->VarRec->DimCount,
&writer_meta_base->Count[StartDim]);
RR.DestinationAddr = nullptr;
if (doAllocTempBuffers)
RR.OffsetInBlock = 0;
if (RR.DirectToAppMemory)
{
RR.DestinationAddr = (char *)malloc(RR.ReadLength);
RR.DestinationAddr = (char *)Req->Data;
if (Req->Start.size() != 0)
{
RR.ReadLength =
helper::GetDataTypeSize(Req->VarRec->Type) *
CalcBlockLength(Req->VarRec->DimCount,
Req->Count.data());
/* DirectToAppMemory handles only 1D, so offset calc
* is 1D only for the moment */
RR.StartOffset +=
helper::GetDataTypeSize(Req->VarRec->Type) *
Req->Start[0];
}
}
else
{
RR.DestinationAddr = nullptr;
if (doAllocTempBuffers)
{
RR.DestinationAddr = (char *)malloc(RR.ReadLength);
}
*maxReadSize =
(*maxReadSize < RR.ReadLength ? RR.ReadLength
: *maxReadSize);
}
*maxReadSize =
(*maxReadSize < RR.ReadLength ? RR.ReadLength
: *maxReadSize);
RR.Internal = NULL;
RR.OffsetInBlock = 0;
RR.ReqIndex = ReqIndex;
RR.BlockID = NeededBlock;
Ret.push_back(RR);
Expand All @@ -1375,6 +1418,7 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
Block++)
{
std::array<size_t, helper::MAX_DIMS> intersectionstart;
std::array<size_t, helper::MAX_DIMS> intersectionend;
std::array<size_t, helper::MAX_DIMS> intersectioncount;

size_t StartDim = Block * Req->VarRec->DimCount;
Expand Down Expand Up @@ -1404,16 +1448,14 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
*maxReadSize =
(*maxReadSize < RR.ReadLength ? RR.ReadLength
: *maxReadSize);
RR.Internal = NULL;
RR.DirectToAppMemory = false;
RR.ReqIndex = ReqIndex;
RR.BlockID = Block;
RR.OffsetInBlock = 0;
Ret.push_back(RR);
}
else
{
VariableBase *VB = static_cast<VariableBase *>(
Req->VarRec->Variable);
for (size_t Dim = 0; Dim < Req->VarRec->DimCount;
Dim++)
{
Expand All @@ -1429,14 +1471,15 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
for (size_t Dim = 0; Dim < Req->VarRec->DimCount;
Dim++)
{
intersectionstart[Dim] +=
intersectioncount[Dim] - 1;
intersectionend[Dim] = intersectionstart[Dim] +
intersectioncount[Dim] -
1;
}
size_t EndOffsetInBlock =
VB->m_ElementSize *
(LinearIndex(Req->VarRec->DimCount,
&writer_meta_base->Count[StartDim],
&intersectionstart[0],
&intersectionend[0],
m_ReaderIsRowMajor) +
1);
ReadRequest RR;
Expand All @@ -1447,16 +1490,40 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
StartOffsetInBlock;
RR.ReadLength =
EndOffsetInBlock - StartOffsetInBlock;
RR.DestinationAddr = nullptr;
if (doAllocTempBuffers)
RR.DirectToAppMemory = IsContiguousTransfer(
Req, &writer_meta_base->Offsets[StartDim],
&writer_meta_base->Count[StartDim]);
if (RR.DirectToAppMemory)
{
/*
* DirectToAppMemory handles only 1D, so offset
* calc is 1D only for the moment ContigOffset
* handles the case where our destination is not
* the start of the destination memory (because
* some other block filled in that start)
*/

ssize_t ContigOffset =
(writer_meta_base->Offsets[StartDim + 0] -
Req->Start[0]) *
VB->m_ElementSize;
if (ContigOffset < 0)
ContigOffset = 0;
RR.DestinationAddr =
(char *)malloc(RR.ReadLength);
(char *)Req->Data + ContigOffset;
}
else
{
RR.DestinationAddr = nullptr;
if (doAllocTempBuffers)
{
RR.DestinationAddr =
(char *)malloc(RR.ReadLength);
}
*maxReadSize = (*maxReadSize < RR.ReadLength
? RR.ReadLength
: *maxReadSize);
}
*maxReadSize =
(*maxReadSize < RR.ReadLength ? RR.ReadLength
: *maxReadSize);
RR.Internal = NULL;
RR.OffsetInBlock = StartOffsetInBlock;
RR.ReqIndex = ReqIndex;
RR.BlockID = Block;
Expand All @@ -1473,6 +1540,11 @@ BP5Deserializer::GenerateReadRequests(const bool doAllocTempBuffers,
void BP5Deserializer::FinalizeGet(const ReadRequest &Read, const bool freeAddr)
{
auto Req = PendingRequests[Read.ReqIndex];

// if we could do this, nothing else to do
if (Read.DirectToAppMemory)
return;

int ElementSize = Req.VarRec->ElementSize;
MetaArrayRec *writer_meta_base =
(MetaArrayRec *)GetMetadataBase(Req.VarRec, Req.Step, Read.WriterRank);
Expand Down
5 changes: 4 additions & 1 deletion source/adios2/toolkit/format/bp5/BP5Deserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class BP5Deserializer : virtual public BP5Base
size_t StartOffset;
size_t ReadLength;
char *DestinationAddr;
void *Internal;
bool DirectToAppMemory;
size_t ReqIndex;
size_t OffsetInBlock;
size_t BlockID;
Expand Down Expand Up @@ -230,6 +230,9 @@ class BP5Deserializer : virtual public BP5Base
std::vector<BP5ArrayRequest> PendingRequests;
void *GetMetadataBase(BP5VarRec *VarRec, size_t Step,
size_t WriterRank) const;
bool IsContiguousTransfer(BP5ArrayRequest *Req, size_t *offsets,
size_t *count);

size_t CurTimestep = 0;

/* We assume operators are not thread-safe, call Decompress() one at a time
Expand Down