From d0412e0512c08f764cfbfb76a300ae2e3fde78dc Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Tue, 18 May 2021 13:15:57 -0400 Subject: [PATCH 1/8] serializer and file engine --- source/adios2/CMakeLists.txt | 13 +- source/adios2/core/IO.cpp | 4 + source/adios2/engine/bp5/BP5Engine.cpp | 188 + source/adios2/engine/bp5/BP5Engine.h | 141 + source/adios2/engine/bp5/BP5Reader.cpp | 664 +++ source/adios2/engine/bp5/BP5Reader.h | 209 + source/adios2/engine/bp5/BP5Reader.tcc | 43 + source/adios2/engine/bp5/BP5Writer.cpp | 566 +++ source/adios2/engine/bp5/BP5Writer.h | 173 + source/adios2/engine/bp5/BP5Writer.tcc | 53 + source/adios2/engine/sst/SstParamParser.cpp | 4 + source/adios2/engine/sst/SstReader.cpp | 107 +- source/adios2/engine/sst/SstReader.h | 4 + source/adios2/engine/sst/SstWriter.cpp | 85 +- source/adios2/engine/sst/SstWriter.h | 13 +- source/adios2/engine/sst/SstWriter.tcc | 19 +- source/adios2/toolkit/format/bp5/BP5Base.cpp | 58 + source/adios2/toolkit/format/bp5/BP5Base.h | 62 + .../toolkit/format/bp5/BP5Deserializer.cpp | 1022 ++++ .../toolkit/format/bp5/BP5Deserializer.h | 210 + .../toolkit/format/bp5/BP5Deserializer.tcc | 89 + .../toolkit/format/bp5/BP5Serializer.cpp | 819 +++ .../adios2/toolkit/format/bp5/BP5Serializer.h | 157 + .../adios2/toolkit/format/buffer/BufferV.cpp | 81 + source/adios2/toolkit/format/buffer/BufferV.h | 59 + .../toolkit/format/buffer/ffs/BufferFFS.cpp | 33 + .../toolkit/format/buffer/ffs/BufferFFS.h | 42 + .../toolkit/format/buffer/ffs/BufferSTL.tcc | 57 + source/adios2/toolkit/sst/cp/cp_common.c | 2 +- source/adios2/toolkit/sst/cp/cp_internal.h | 5 + source/adios2/toolkit/sst/cp/cp_reader.c | 4414 +++++++++-------- source/adios2/toolkit/sst/cp/cp_writer.c | 31 + source/adios2/toolkit/sst/sst.h | 15 +- source/adios2/toolkit/sst/sst_data.h | 8 + testing/adios2/engine/bp/CMakeLists.txt | 5 + .../engine/staging-common/CMakeLists.txt | 15 + .../engine/staging-common/run_test.py.gen.in | 1 + 37 files changed, 7310 insertions(+), 2161 deletions(-) create mode 100644 source/adios2/engine/bp5/BP5Engine.cpp create mode 100644 source/adios2/engine/bp5/BP5Engine.h create mode 100644 source/adios2/engine/bp5/BP5Reader.cpp create mode 100644 source/adios2/engine/bp5/BP5Reader.h create mode 100644 source/adios2/engine/bp5/BP5Reader.tcc create mode 100644 source/adios2/engine/bp5/BP5Writer.cpp create mode 100644 source/adios2/engine/bp5/BP5Writer.h create mode 100644 source/adios2/engine/bp5/BP5Writer.tcc create mode 100644 source/adios2/toolkit/format/bp5/BP5Base.cpp create mode 100644 source/adios2/toolkit/format/bp5/BP5Base.h create mode 100644 source/adios2/toolkit/format/bp5/BP5Deserializer.cpp create mode 100644 source/adios2/toolkit/format/bp5/BP5Deserializer.h create mode 100644 source/adios2/toolkit/format/bp5/BP5Deserializer.tcc create mode 100644 source/adios2/toolkit/format/bp5/BP5Serializer.cpp create mode 100644 source/adios2/toolkit/format/bp5/BP5Serializer.h create mode 100644 source/adios2/toolkit/format/buffer/BufferV.cpp create mode 100644 source/adios2/toolkit/format/buffer/BufferV.h create mode 100644 source/adios2/toolkit/format/buffer/ffs/BufferFFS.cpp create mode 100644 source/adios2/toolkit/format/buffer/ffs/BufferFFS.h create mode 100644 source/adios2/toolkit/format/buffer/ffs/BufferSTL.tcc diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index bdd3ba6a02..f96498fcf7 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -56,6 +56,10 @@ add_library(adios2_core engine/bp4/BP4Reader.cpp engine/bp4/BP4Reader.tcc engine/bp4/BP4Writer.cpp engine/bp4/BP4Writer.tcc + engine/bp5/BP5Engine.cpp + engine/bp5/BP5Reader.cpp engine/bp5/BP5Reader.tcc + engine/bp5/BP5Writer.cpp engine/bp5/BP5Writer.tcc + engine/skeleton/SkeletonReader.cpp engine/skeleton/SkeletonReader.tcc engine/skeleton/SkeletonWriter.cpp engine/skeleton/SkeletonWriter.tcc @@ -67,7 +71,9 @@ add_library(adios2_core engine/nullcore/NullCoreWriter.cpp engine/nullcore/NullCoreWriter.tcc #toolkit toolkit/format/buffer/Buffer.cpp + toolkit/format/buffer/BufferV.cpp toolkit/format/buffer/heap/BufferSTL.cpp + toolkit/format/buffer/ffs/BufferFFS.cpp toolkit/format/bp/BPBase.cpp toolkit/format/bp/BPBase.tcc toolkit/format/bp/BPSerializer.cpp toolkit/format/bp/BPSerializer.tcc @@ -120,7 +126,7 @@ target_include_directories(adios2_core $ $ ) -target_link_libraries(adios2_core PRIVATE adios2sys_interface adios2::thirdparty::pugixml taustubs adios2::thirdparty::yaml-cpp) +target_link_libraries(adios2_core PRIVATE adios2sys_interface adios2::thirdparty::pugixml taustubs adios2::thirdparty::yaml-cpp ffs::ffs) target_link_libraries(adios2_core PUBLIC ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(adios2_core PUBLIC "$") @@ -210,6 +216,11 @@ endif() if(ADIOS2_HAVE_SST) add_subdirectory(toolkit/sst) target_sources(adios2_core PRIVATE + + toolkit/format/bp5/BP5Base.cpp + toolkit/format/bp5/BP5Serializer.cpp + toolkit/format/bp5/BP5Deserializer.cpp toolkit/format/bp5/BP5Deserializer.tcc + engine/sst/SstReader.cpp engine/sst/SstWriter.cpp engine/sst/SstParamParser.cpp diff --git a/source/adios2/core/IO.cpp b/source/adios2/core/IO.cpp index 369529dfeb..41f087e586 100644 --- a/source/adios2/core/IO.cpp +++ b/source/adios2/core/IO.cpp @@ -22,6 +22,8 @@ #include "adios2/engine/bp3/BP3Writer.h" #include "adios2/engine/bp4/BP4Reader.h" #include "adios2/engine/bp4/BP4Writer.h" +#include "adios2/engine/bp5/BP5Reader.h" +#include "adios2/engine/bp5/BP5Writer.h" #include "adios2/engine/inline/InlineReader.h" #include "adios2/engine/inline/InlineWriter.h" #include "adios2/engine/null/NullEngine.h" @@ -65,6 +67,8 @@ std::unordered_map Factory = { {IO::MakeEngine, IO::MakeEngine}}, {"bp4", {IO::MakeEngine, IO::MakeEngine}}, + {"bp5", + {IO::MakeEngine, IO::MakeEngine}}, {"hdfmixer", #ifdef ADIOS2_HAVE_HDF5 IO_MakeEngine_HDFMixer() diff --git a/source/adios2/engine/bp5/BP5Engine.cpp b/source/adios2/engine/bp5/BP5Engine.cpp new file mode 100644 index 0000000000..1fe9364918 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Engine.cpp @@ -0,0 +1,188 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Engine.cpp + * + */ + +#include "BP5Engine.h" + +#include "adios2/common/ADIOSMacros.h" +#include "adios2/common/ADIOSTypes.h" //PathSeparator +#include "adios2/core/IO.h" +#include "adios2/helper/adiosFunctions.h" //CreateDirectory, StringToTimeUnit, + +#include +#include + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +std::vector +BP5Engine::GetBPMetadataFileNames(const std::vector &names) const + noexcept +{ + std::vector metadataFileNames; + metadataFileNames.reserve(names.size()); + for (const auto &name : names) + { + metadataFileNames.push_back(GetBPMetadataFileName(name)); + } + return metadataFileNames; +} + +std::vector BP5Engine::GetBPMetaMetadataFileNames( + const std::vector &names) const noexcept +{ + std::vector metaMetadataFileNames; + metaMetadataFileNames.reserve(names.size()); + for (const auto &name : names) + { + metaMetadataFileNames.push_back(GetBPMetaMetadataFileName(name)); + } + return metaMetadataFileNames; +} + +std::string BP5Engine::GetBPMetadataFileName(const std::string &name) const + noexcept +{ + const std::string bpName = helper::RemoveTrailingSlash(name); + const size_t index = 0; // global metadata file is generated by rank 0 + /* the name of the metadata file is "md.0" */ + const std::string bpMetaDataRankName(bpName + PathSeparator + "md." + + std::to_string(index)); + return bpMetaDataRankName; +} + +std::string BP5Engine::GetBPMetaMetadataFileName(const std::string &name) const + noexcept +{ + const std::string bpName = helper::RemoveTrailingSlash(name); + const size_t index = 0; // global metadata file is generated by rank 0 + /* the name of the metadata file is "md.0" */ + const std::string bpMetaMetaDataRankName(bpName + PathSeparator + "mmd." + + std::to_string(index)); + return bpMetaMetaDataRankName; +} + +std::vector BP5Engine::GetBPMetadataIndexFileNames( + const std::vector &names) const noexcept +{ + std::vector metadataIndexFileNames; + metadataIndexFileNames.reserve(names.size()); + for (const auto &name : names) + { + metadataIndexFileNames.push_back(GetBPMetadataIndexFileName(name)); + } + return metadataIndexFileNames; +} + +std::string BP5Engine::GetBPMetadataIndexFileName(const std::string &name) const + noexcept +{ + const std::string bpName = helper::RemoveTrailingSlash(name); + /* the name of the metadata index file is "md.idx" */ + const std::string bpMetaDataIndexRankName(bpName + PathSeparator + + "md.idx"); + return bpMetaDataIndexRankName; +} + +std::string BP5Engine::GetBPSubStreamName(const std::string &name, + const size_t id, + const bool hasSubFiles, + const bool isReader) const noexcept +{ + if (!hasSubFiles) + { + return name; + } + + const std::string bpName = helper::RemoveTrailingSlash(name); + + const size_t index = id; + // isReader ? id + // : m_Aggregator.m_IsActive ? m_Aggregator.m_SubStreamIndex : id; + + /* the name of a data file starts with "data." */ + const std::string bpRankName(bpName + PathSeparator + "data." + + std::to_string(index)); + return bpRankName; +} + +std::vector +BP5Engine::GetBPSubStreamNames(const std::vector &names) const + noexcept +{ + std::vector bpNames; + bpNames.reserve(names.size()); + + for (const auto &name : names) + { + bpNames.push_back( + GetBPSubStreamName(name, static_cast(m_RankMPI))); + } + return bpNames; +} + +void BP5Engine::ParseParams(IO &io, struct BP5Params &Params) +{ + std::memset(&Params, 0, sizeof(Params)); + + auto lf_SetBoolParameter = [&](const std::string key, bool ¶meter) { + auto itKey = io.m_Parameters.find(key); + if (itKey != io.m_Parameters.end()) + { + std::string value = itKey->second; + std::transform(value.begin(), value.end(), value.begin(), + ::tolower); + if (value == "yes" || value == "true" || value == "on") + { + parameter = true; + } + else if (value == "no" || value == "false" || value == "off") + { + parameter = false; + } + else + { + throw std::invalid_argument( + "ERROR: Unknown BP5 Boolean parameter \"" + value + "\""); + } + } + }; + auto lf_SetIntParameter = [&](const std::string key, int ¶meter) { + auto itKey = io.m_Parameters.find(key); + if (itKey != io.m_Parameters.end()) + { + parameter = std::stoi(itKey->second); + return true; + } + return false; + }; + + auto lf_SetStringParameter = [&](const std::string key, + std::string ¶meter) { + auto itKey = io.m_Parameters.find(key); + if (itKey != io.m_Parameters.end()) + { + parameter = itKey->second; + return true; + } + return false; + }; + +#define get_params(Param, Type, Typedecl, Default) \ + Params.Param = Default; \ + lf_Set##Type##Parameter(#Param, Params.Param); + BP5_FOREACH_PARAMETER_TYPE_4ARGS(get_params); +#undef get_params +}; + +} // namespace engine +} // namespace core +} // namespace adios2 diff --git a/source/adios2/engine/bp5/BP5Engine.h b/source/adios2/engine/bp5/BP5Engine.h new file mode 100644 index 0000000000..5e276d207c --- /dev/null +++ b/source/adios2/engine/bp5/BP5Engine.h @@ -0,0 +1,141 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Writer.h + * + */ + +#ifndef ADIOS2_ENGINE_BP5_BP5ENGINE_H_ +#define ADIOS2_ENGINE_BP5_BP5ENGINE_H_ + +#include "adios2/common/ADIOSConfig.h" +#include "adios2/core/Engine.h" +#include "adios2/helper/adiosComm.h" +#include "adios2/toolkit/burstbuffer/FileDrainerSingleThread.h" +#include "adios2/toolkit/format/bp5/BP5Serializer.h" +#include "adios2/toolkit/transportman/TransportMan.h" + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +class BP5Engine +{ +public: + int m_RankMPI = 0; + /* metadata index table*/ + std::unordered_map> m_MetadataIndexTable; + + struct Minifooter + { + std::string VersionTag; + uint64_t PGIndexStart = 0; + uint64_t VarsIndexStart = 0; + uint64_t AttributesIndexStart = 0; + int8_t Version = -1; + bool IsLittleEndian = true; + bool HasSubFiles = false; + }; + + format::BufferSTL m_MetadataIndex; + + /** Positions of flags in Index Table Header that Reader uses */ + static constexpr size_t m_IndexHeaderSize = 64; + static constexpr size_t m_EndianFlagPosition = 36; + static constexpr size_t m_BPVersionPosition = 37; + static constexpr size_t m_ActiveFlagPosition = 38; + static constexpr size_t m_BPMinorVersionPosition = 39; + static constexpr size_t m_WriterCountPosition = 40; + static constexpr size_t m_AggregatorCountPosition = 44; + static constexpr size_t m_ColumnMajorFlagPosition = 48; + static constexpr size_t m_VersionTagPosition = 0; + static constexpr size_t m_VersionTagLength = 32; + + std::vector + GetBPSubStreamNames(const std::vector &names) const noexcept; + + std::vector + GetBPMetadataFileNames(const std::vector &names) const + noexcept; + std::vector + GetBPMetaMetadataFileNames(const std::vector &names) const + noexcept; + std::string GetBPMetadataFileName(const std::string &name) const noexcept; + std::string GetBPMetaMetadataFileName(const std::string &name) const + noexcept; + std::vector + GetBPMetadataIndexFileNames(const std::vector &names) const + noexcept; + + std::string GetBPMetadataIndexFileName(const std::string &name) const + noexcept; + + std::string GetBPSubStreamName(const std::string &name, const size_t id, + const bool hasSubFiles = true, + const bool isReader = false) const noexcept; + +#define BP5_FOREACH_PARAMETER_TYPE_4ARGS(MACRO) \ + MACRO(OpenTimeoutSecs, Int, int, 3600) \ + MACRO(BeginStepPollingFrequencySecs, Int, int, 0) \ + MACRO(StreamReader, Bool, bool, false) \ + MACRO(BurstBufferDrain, Bool, bool, true) \ + MACRO(NodeLocal, Bool, bool, false) \ + MACRO(BurstBufferPath, String, std::string, "\"\"") \ + MACRO(verbose, Int, int, 0) \ + MACRO(CollectiveMetadata, Bool, bool, true) \ + MACRO(ReaderShortCircuitReads, Bool, bool, false) + + struct BP5Params + { +#define declare_struct(Param, Type, Typedecl, Default) Typedecl Param; + BP5_FOREACH_PARAMETER_TYPE_4ARGS(declare_struct) +#undef declare_struct + }; + + void ParseParams(IO &io, BP5Params &Params); + BP5Params m_Parameters; + +private: +}; + +} // namespace engine +} // namespace core +} // namespace adios2 +#endif + +/* + * Data Formats: + * MetadataIndex file (md.idx) + * BP5 header for "Index Table" (64 bytes) + * for each Writer, what aggregator writes its data + * uint16_t * WriterCount; + * for each timestep: + * uint64_t 0 : CombinedMetaDataPos + * uint64_t 1 : CombinedMetaDataSize + * for each Writer + * uint64_t DataPos (in the file above) + * + * MetaMetadata file (mmd.0) contains FFS format information + * for each meta metadata item: + * uint64_t MetaMetaIDLen + * uint64_t MetaMetaInfoLen + * char[MeatMetaIDLen] MetaMetaID + * char[MetaMetaInfoLen] MetaMetanfo + * Notes: This file should be quite small, with size dependent upon the + *number of different "formats" written by any rank. + * + * + * MetaData file (md.0) contains encoded metadata for each timestep, for each + *rank BP5 header for "Metadata" (64 bytes) for each timestep: uint64_t + *TotalSize of this metadata block (including this length) uint64_t[WriterCount] + *Length of each writer rank's metadata for each rank FFS-encoded metadata block + *of length corresponding to entry above + * + * + * Data file (data.x) contains a block of data for each timestep, for each + *rank + */ diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp new file mode 100644 index 0000000000..1e9b917664 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Reader.cpp @@ -0,0 +1,664 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Reader.cpp + * + * Created on: Aug 1, 2018 + * Author: Lipeng Wan wanl@ornl.gov + */ + +#include "BP5Reader.h" +#include "BP5Reader.tcc" + +#include "adios2/toolkit/profiling/taustubs/tautimer.hpp" + +#include +#include + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +BP5Reader::BP5Reader(IO &io, const std::string &name, const Mode mode, + helper::Comm comm) +: Engine("BP5Reader", io, name, mode, std::move(comm)), m_MDFileManager(m_Comm), + m_FileMetaMetadataManager(m_Comm), m_DataFileManager(m_Comm), + m_MDIndexFileManager(m_Comm), m_ActiveFlagFileManager(m_Comm) +{ + TAU_SCOPED_TIMER("BP5Reader::Open"); + Init(); +} + +StepStatus BP5Reader::BeginStep(StepMode mode, const float timeoutSeconds) +{ + TAU_SCOPED_TIMER("BP5Reader::BeginStep"); + if (mode != StepMode::Read) + { + throw std::invalid_argument("ERROR: mode is not supported yet, " + "only Read is valid for " + "engine BP5Reader, in call to " + "BeginStep\n"); + } + + m_IO.m_ReadStreaming = false; // can't do those checks + StepStatus status = StepStatus::OK; + if (m_FirstStep) + { + if (m_StepsCount == 0) + { + // status = CheckForNewSteps(Seconds(timeoutSeconds)); + } + } + else + { + if (m_CurrentStep + 1 >= m_StepsCount) + { + // status = CheckForNewSteps(Seconds(timeoutSeconds)); + status = StepStatus::EndOfStream; + } + } + if (status == StepStatus::OK) + { + if (m_FirstStep) + { + m_FirstStep = false; + } + else + { + ++m_CurrentStep; + } + + m_IO.m_EngineStep = m_CurrentStep; + // SstBlock AttributeBlockList = + // SstGetAttributeData(m_Input, SstCurrentStep(m_Input)); + // i = 0; + // while (AttributeBlockList && AttributeBlockList[i].BlockData) + // { + // m_IO.RemoveAllAttributes(); + // m_BP5Deserializer->InstallAttributeData( + // AttributeBlockList[i].BlockData, + // AttributeBlockList[i].BlockSize); + // i++; + // } + + m_IO.RemoveAllVariables(); + m_BP5Deserializer->SetupForTimestep(m_CurrentStep); + + size_t pgstart = m_MetadataIndexTable[m_CurrentStep][0]; + size_t Position = pgstart + sizeof(uint64_t); // skip total data size + size_t MDPosition = Position + sizeof(uint64_t) * m_WriterCount; + for (int i = 0; i < m_WriterCount; i++) + { + size_t ThisMDSize = helper::ReadValue( + m_Metadata.m_Buffer, Position, m_Minifooter.IsLittleEndian); + char *ThisMD = m_Metadata.m_Buffer.data() + MDPosition; + m_BP5Deserializer->InstallMetaData(ThisMD, ThisMDSize, i); + MDPosition += ThisMDSize; + } + + m_IO.ResetVariablesStepSelection(false, + "in call to BP5 Reader BeginStep"); + + // caches attributes for each step + // if a variable name is a prefix + // e.g. var prefix = {var/v1, var/v2, var/v3} + m_IO.SetPrefixedNames(true); + } + + return status; +} + +size_t BP5Reader::CurrentStep() const { return m_CurrentStep; } + +void BP5Reader::EndStep() +{ + TAU_SCOPED_TIMER("BP5Reader::EndStep"); + PerformGets(); +} + +void BP5Reader::ReadData(const size_t WriterRank, const size_t Timestep, + const size_t StartOffset, const size_t Length, + char *Destination) +{ + size_t DataStartPos = m_MetadataIndexTable[Timestep][2]; + std::cout << "DataOffsetsStart in MDatafile is " << DataStartPos + << std::endl; + DataStartPos += WriterRank * sizeof(uint64_t); + std::cout << "DataOffsetsStart after addition is " << DataStartPos + << std::endl; + size_t DataStart = helper::ReadValue( + m_MetadataIndex.m_Buffer, DataStartPos, m_Minifooter.IsLittleEndian); + std::cout << "Data start for timestep " << Timestep << " Rank " + << WriterRank << " is " << std::hex << DataStart << std::dec + << std::endl; + // check if subfile is already opened + if (m_DataFileManager.m_Transports.count(WriterRank) == 0) + { + const std::string subFileName = GetBPSubStreamName( + m_Name, WriterRank, m_Minifooter.HasSubFiles, true); + + m_DataFileManager.OpenFileID(subFileName, WriterRank, Mode::Read, + {{"transport", "File"}}, false); + } + m_DataFileManager.ReadFile(Destination, Length, DataStart + StartOffset, + WriterRank); +} + +void BP5Reader::PerformGets() +{ + TAU_SCOPED_TIMER("BP5Reader::PerformGets"); + auto ReadRequests = m_BP5Deserializer->GenerateReadRequests(); + // Potentially optimize read requests, make contiguous, etc. + for (const auto &Req : ReadRequests) + { + ReadData(Req.WriterRank, Req.Timestep, Req.StartOffset, Req.ReadLength, + Req.DestinationAddr); + } + + m_BP5Deserializer->FinalizeGets(ReadRequests); +} + +// PRIVATE +void BP5Reader::Init() +{ + if (m_OpenMode != Mode::Read) + { + throw std::invalid_argument("ERROR: BPFileReader only " + "supports OpenMode::Read from" + + m_Name + " " + m_EndMessage); + } + + ParseParams(m_IO, m_Parameters); + m_ReaderIsRowMajor = helper::IsRowMajor(m_IO.m_HostLanguage); + InitTransports(); + + /* Do a collective wait for the file(s) to appear within timeout. + Make sure every process comes to the same conclusion */ + const Seconds timeoutSeconds = Seconds(m_Parameters.OpenTimeoutSecs); + + Seconds pollSeconds = Seconds(m_Parameters.BeginStepPollingFrequencySecs); + if (pollSeconds > timeoutSeconds) + { + pollSeconds = timeoutSeconds; + } + + TimePoint timeoutInstant = + std::chrono::steady_clock::now() + timeoutSeconds; + + OpenFiles(timeoutInstant, pollSeconds, timeoutSeconds); + if (!m_Parameters.StreamReader) + { + /* non-stream reader gets as much steps as available now */ + InitBuffer(timeoutInstant, pollSeconds / 10, timeoutSeconds); + } +} + +bool BP5Reader::SleepOrQuit(const TimePoint &timeoutInstant, + const Seconds &pollSeconds) +{ + auto now = std::chrono::steady_clock::now(); + if (now + pollSeconds >= timeoutInstant) + { + return false; + } + auto remainderTime = timeoutInstant - now; + auto sleepTime = pollSeconds; + if (remainderTime < sleepTime) + { + sleepTime = remainderTime; + } + std::this_thread::sleep_for(sleepTime); + return true; +} + +size_t BP5Reader::OpenWithTimeout(transportman::TransportMan &tm, + const std::vector &fileNames, + const TimePoint &timeoutInstant, + const Seconds &pollSeconds, + std::string &lasterrmsg /*INOUT*/) +{ + size_t flag = 1; // 0 = OK, opened file, 1 = timeout, 2 = error + do + { + try + { + errno = 0; + const bool profile = + false; // m_BP4Deserializer.m_Profiler.m_IsActive; + tm.OpenFiles(fileNames, adios2::Mode::Read, + m_IO.m_TransportsParameters, profile); + flag = 0; // found file + break; + } + catch (std::ios_base::failure &e) + { + lasterrmsg = + std::string("errno=" + std::to_string(errno) + ": " + e.what()); + if (errno == ENOENT) + { + flag = 1; // timeout + } + else + { + flag = 2; // fatal error + break; + } + } + } while (SleepOrQuit(timeoutInstant, pollSeconds)); + return flag; +} + +void BP5Reader::OpenFiles(TimePoint &timeoutInstant, const Seconds &pollSeconds, + const Seconds &timeoutSeconds) +{ + /* Poll */ + size_t flag = 1; // 0 = OK, opened file, 1 = timeout, 2 = error + std::string lasterrmsg; + if (m_Comm.Rank() == 0) + { + /* Open the metadata index table */ + const std::string metadataIndexFile(GetBPMetadataIndexFileName(m_Name)); + + flag = OpenWithTimeout(m_MDIndexFileManager, {metadataIndexFile}, + timeoutInstant, pollSeconds, lasterrmsg); + if (flag == 0) + { + /* Open the metadata file */ + const std::string metadataFile(GetBPMetadataFileName(m_Name)); + + /* We found md.idx. If we don't find md.0 immediately we should + * wait a little bit hoping for the file system to catch up. + * This slows down finding the error in file reading mode but + * it will be more robust in streaming mode + */ + if (timeoutSeconds == Seconds(0.0)) + { + timeoutInstant += Seconds(5.0); + } + + flag = OpenWithTimeout(m_MDFileManager, {metadataFile}, + timeoutInstant, pollSeconds, lasterrmsg); + if (flag != 0) + { + /* Close the metadata index table */ + m_MDIndexFileManager.CloseFiles(); + } + else + { + /* Open the metametadata file */ + const std::string metametadataFile( + GetBPMetaMetadataFileName(m_Name)); + + /* We found md.idx. If we don't find md.0 immediately we should + * wait a little bit hoping for the file system to catch up. + * This slows down finding the error in file reading mode but + * it will be more robust in streaming mode + */ + if (timeoutSeconds == Seconds(0.0)) + { + timeoutInstant += Seconds(5.0); + } + + flag = OpenWithTimeout(m_FileMetaMetadataManager, + {metametadataFile}, timeoutInstant, + pollSeconds, lasterrmsg); + if (flag != 0) + { + /* Close the metametadata index table */ + m_MDIndexFileManager.CloseFiles(); + m_MDFileManager.CloseFiles(); + } + } + } + } + + flag = m_Comm.BroadcastValue(flag, 0); + if (flag == 2) + { + if (m_Comm.Rank() == 0 && !lasterrmsg.empty()) + { + throw std::ios_base::failure("ERROR: File " + m_Name + + " cannot be opened: " + lasterrmsg); + } + else + { + throw std::ios_base::failure("File " + m_Name + + " cannot be opened"); + } + } + else if (flag == 1) + { + if (m_Comm.Rank() == 0) + { + throw std::ios_base::failure( + "ERROR: File " + m_Name + " could not be found within the " + + std::to_string(timeoutSeconds.count()) + + "s timeout: " + lasterrmsg); + } + else + { + throw std::ios_base::failure( + "ERROR: File " + m_Name + " could not be found within the " + + std::to_string(timeoutSeconds.count()) + "s timeout"); + } + } + + /* At this point we may have an empty index table. + * The writer has created the file but no content may have been stored yet. + */ +} + +void BP5Reader::InitTransports() +{ + if (m_IO.m_TransportsParameters.empty()) + { + Params defaultTransportParameters; + defaultTransportParameters["transport"] = "File"; + m_IO.m_TransportsParameters.push_back(defaultTransportParameters); + } +} + +uint64_t BP5Reader::MetadataExpectedMinFileSize(const std::string &IdxFileName, + bool hasHeader) +{ + size_t cur_idxsize = m_MetadataIndex.m_Buffer.size(); + static constexpr size_t m_MinIndexRecordSize = 3 * sizeof(uint64_t); + std::cout << " metadata expected min file size Cur = " << cur_idxsize + << " has header " << hasHeader << std::endl; + if ((hasHeader && cur_idxsize < m_IndexHeaderSize + m_MinIndexRecordSize) || + cur_idxsize < m_MinIndexRecordSize) + { + // no (new) step entry in the index, so no metadata is expected + return 0; + } + uint64_t lastpos = + *(uint64_t *)&(m_MetadataIndex.m_Buffer[cur_idxsize - 24]); + std::cout << " metadata expected min file size returning lastpos = " + << lastpos << std::endl; + return lastpos; +} + +void BP5Reader::InstallMetaMetaData(format::BufferSTL buffer) +{ + size_t Position = 0; + while (Position < buffer.m_Buffer.size()) + { + format::BP5Base::MetaMetaInfoBlock MMI; + MMI.MetaMetaIDLen = helper::ReadValue( + buffer.m_Buffer, Position, m_Minifooter.IsLittleEndian); + MMI.MetaMetaInfoLen = helper::ReadValue( + buffer.m_Buffer, Position, m_Minifooter.IsLittleEndian); + MMI.MetaMetaID = buffer.Data() + Position; + MMI.MetaMetaInfo = buffer.Data() + Position + MMI.MetaMetaIDLen; + m_BP5Deserializer->InstallMetaMetaData(MMI); + Position += MMI.MetaMetaIDLen + MMI.MetaMetaInfoLen; + } +} + +void BP5Reader::InitBuffer(const TimePoint &timeoutInstant, + const Seconds &pollSeconds, + const Seconds &timeoutSeconds) +{ + size_t newIdxSize = 0; + // Put all metadata in buffer + if (m_Comm.Rank() == 0) + { + /* Read metadata index table into memory */ + const size_t metadataIndexFileSize = + m_MDIndexFileManager.GetFileSize(0); + if (metadataIndexFileSize > 0) + { + m_MetadataIndex.Resize(metadataIndexFileSize, + "allocating metadata index buffer, " + "in call to BPFileReader Open"); + m_MDIndexFileManager.ReadFile(m_MetadataIndex.m_Buffer.data(), + metadataIndexFileSize); + + /* Read metametadata into memory */ + const size_t metametadataFileSize = + m_FileMetaMetadataManager.GetFileSize(0); + m_MetaMetadata.Resize(metametadataFileSize, + "allocating metadata index buffer, " + "in call to BPFileReader Open"); + m_FileMetaMetadataManager.ReadFile(m_MetaMetadata.m_Buffer.data(), + metametadataFileSize); + + size_t fileSize = 0; + fileSize = m_MDFileManager.GetFileSize(0); +#ifdef NOTDEF + /* Read metadata file into memory but first make sure + * it has the content that the index table refers to */ + uint64_t expectedMinFileSize = + MetadataExpectedMinFileSize(m_Name, true); + do + { + fileSize = m_MDFileManager.GetFileSize(0); + if (fileSize >= expectedMinFileSize) + { + break; + } + } while (SleepOrQuit(timeoutInstant, pollSeconds)); + + if (fileSize >= expectedMinFileSize) + { +#endif + m_Metadata.Resize( + fileSize, + "allocating metadata buffer, in call to BP5Reader Open"); + + m_MDFileManager.ReadFile(m_Metadata.m_Buffer.data(), fileSize); + m_MDFileAlreadyReadSize = fileSize; + m_MDIndexFileAlreadyReadSize = metadataIndexFileSize; + newIdxSize = metadataIndexFileSize; +#ifdef NOTDEF + } + else + { + throw std::ios_base::failure( + "ERROR: File " + m_Name + + " was found with an index file but md.0 " + "has not contained enough data within " + "the specified timeout of " + + std::to_string(timeoutSeconds.count()) + + " seconds. index size = " + + std::to_string(metadataIndexFileSize) + + " metadata size = " + std::to_string(fileSize) + + " expected size = " + std::to_string(expectedMinFileSize) + + ". One reason could be if the reader finds old data while " + "the writer is creating the new files."); + } +#endif + } + } + + newIdxSize = m_Comm.BroadcastValue(newIdxSize, 0); + + if (newIdxSize > 0) + { + // broadcast buffer to all ranks from zero + m_Comm.BroadcastVector(m_Metadata.m_Buffer); + + // broadcast metadata index buffer to all ranks from zero + m_Comm.BroadcastVector(m_MetadataIndex.m_Buffer); + + // broadcast metadata index buffer to all ranks from zero + m_Comm.BroadcastVector(m_MetaMetadata.m_Buffer); + + /* Parse metadata index table */ + ParseMetadataIndex(m_MetadataIndex, 0, true, false); + // now we are sure the index header has been parsed, first step parsing + // done + + std::cout << "Reader row major " << m_ReaderIsRowMajor << std::endl; + std::cout << "Writer row major " << m_WriterIsRowMajor << std::endl; + m_BP5Deserializer = new format::BP5Deserializer( + m_WriterCount, m_WriterIsRowMajor, m_ReaderIsRowMajor); + m_BP5Deserializer->m_Engine = this; + + InstallMetaMetaData(m_MetaMetadata); + + m_IdxHeaderParsed = true; + + // fills IO with Variables and Attributes + // m_MDFileProcessedSize = ParseMetadata( + // m_Metadata, *this, true); + + /* m_MDFileProcessedSize is the position in the buffer where processing + * ends. The processing is controlled by the number of records in the + * Index, which may be less than the actual entries in the metadata in a + * streaming situation (where writer has just written metadata for step + * K+1,...,K+L while the index contains K steps when the reader looks at + * it). + * + * In ProcessMetadataForNewSteps(), we will re-read the metadata which + * is in the buffer but has not been processed yet. + */ + } +} + +void BP5Reader::ParseMetadataIndex(format::BufferSTL &bufferSTL, + const size_t absoluteStartPos, + const bool hasHeader, const bool oneStepOnly) +{ + const auto &buffer = bufferSTL.m_Buffer; + size_t &position = bufferSTL.m_Position; + + if (hasHeader) + { + // Read header (64 bytes) + // long version string + position = m_VersionTagPosition; + m_Minifooter.VersionTag.assign(&buffer[position], m_VersionTagLength); + + position = m_EndianFlagPosition; + const uint8_t endianness = helper::ReadValue(buffer, position); + m_Minifooter.IsLittleEndian = (endianness == 0) ? true : false; +#ifndef ADIOS2_HAVE_ENDIAN_REVERSE + if (helper::IsLittleEndian() != m_Minifooter.IsLittleEndian) + { + throw std::runtime_error( + "ERROR: reader found BigEndian bp file, " + "this version of ADIOS2 wasn't compiled " + "with the cmake flag -DADIOS2_USE_Endian_Reverse=ON " + "explicitly, in call to Open\n"); + } +#endif + + // This has no flag in BP5 header. Always true + m_Minifooter.HasSubFiles = true; + + // BP version + position = m_BPVersionPosition; + m_Minifooter.Version = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + if (m_Minifooter.Version != 5) + { + throw std::runtime_error( + "ERROR: ADIOS2 BP5 Engine only supports bp format " + "version 5, found " + + std::to_string(m_Minifooter.Version) + " version \n"); + } + + // Writer active flag + position = m_ActiveFlagPosition; + const char activeChar = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + m_WriterIsActive = (activeChar == '\1' ? true : false); + position = m_WriterCountPosition; + m_WriterCount = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + position = m_AggregatorCountPosition; + m_AggregatorCount = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + position = m_ColumnMajorFlagPosition; + const uint8_t val = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + std::cout << "Row major char is '" << val << "'" << std::endl; + m_WriterIsRowMajor = val == 'n'; + // move position to first row + position = 64; + } + std::cout << "Mini foot vers version " << (int)m_Minifooter.Version + << std::endl; + + for (uint64_t i = 0; i < m_WriterCount; i++) + { + m_WriterToFileMap.push_back(helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian)); + std::cout << "Writer " << i << " wrote to file " << m_WriterToFileMap[i] + << std::endl; + } + + // Read each record now + uint64_t currentStep = 0; + do + { + std::vector ptrs; + std::cout << "Start Timestep position " << position << std::endl; + const uint64_t MetadataPos = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + const uint64_t MetadataSize = helper::ReadValue( + buffer, position, m_Minifooter.IsLittleEndian); + + ptrs.push_back(MetadataPos); + ptrs.push_back(MetadataSize); + ptrs.push_back(position); + m_MetadataIndexTable[currentStep] = ptrs; + std::cout << "Timestep " << currentStep << " has MetadataStart " + << ptrs[0] << " MetadataSize " << ptrs[1] << std::endl; + for (uint64_t i = 0; i < m_WriterCount; i++) + { + size_t DataPosPos = ptrs[2] + sizeof(uint64_t) * i; + const uint64_t DataPos = helper::ReadValue( + buffer, DataPosPos, m_Minifooter.IsLittleEndian); + std::cout << "Writer " << i << " data starts at " << DataPos + << std::endl; + } + + position += sizeof(uint64_t) * m_WriterCount; + m_StepsCount++; + currentStep++; + } while (!oneStepOnly && position < buffer.size()); +} + +#define declare_type(T) \ + void BP5Reader::DoGetSync(Variable &variable, T *data) \ + { \ + TAU_SCOPED_TIMER("BP5Reader::Get"); \ + GetSyncCommon(variable, data); \ + } \ + void BP5Reader::DoGetDeferred(Variable &variable, T *data) \ + { \ + TAU_SCOPED_TIMER("BP5Reader::Get"); \ + GetDeferredCommon(variable, data); \ + } +ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + +void BP5Reader::DoClose(const int transportIndex) +{ + TAU_SCOPED_TIMER("BP5Reader::Close"); + m_DataFileManager.CloseFiles(); + m_MDFileManager.CloseFiles(); +} + +#define declare_type(T) \ + std::vector::BPInfo> BP5Reader::DoBlocksInfo( \ + const Variable &variable, const size_t step) const \ + { \ + TAU_SCOPED_TIMER("BP5Reader::BlocksInfo"); \ + return m_BP5Deserializer->BlocksInfo(variable, step); \ + } + +ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + +size_t BP5Reader::DoSteps() const { return m_StepsCount; } + +} // end namespace engine +} // end namespace core +} // end namespace adios2 diff --git a/source/adios2/engine/bp5/BP5Reader.h b/source/adios2/engine/bp5/BP5Reader.h new file mode 100644 index 0000000000..916b6e11c4 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Reader.h @@ -0,0 +1,209 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Reader.h + * + * Created on: Aug 1, 2018 + * Author: Lipeng Wan wanl@ornl.gov + */ + +#ifndef ADIOS2_ENGINE_BP5_BP5READER_H_ +#define ADIOS2_ENGINE_BP5_BP5READER_H_ + +#include "adios2/common/ADIOSConfig.h" +#include "adios2/core/Engine.h" +#include "adios2/engine/bp5/BP5Engine.h" +#include "adios2/helper/adiosComm.h" +#include "adios2/toolkit/format/bp5/BP5Deserializer.h" +#include "adios2/toolkit/transportman/TransportMan.h" + +#include + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +class BP5Reader : public BP5Engine, public Engine +{ + +public: + /** + * Unique constructor + * @param io + * @param name + * @param openMode only read + * @param comm + */ + BP5Reader(IO &io, const std::string &name, const Mode mode, + helper::Comm comm); + + virtual ~BP5Reader() = default; + + StepStatus BeginStep(StepMode mode = StepMode::Read, + const float timeoutSeconds = -1.0) final; + + size_t CurrentStep() const final; + + void EndStep() final; + + void PerformGets() final; + +private: + typedef std::chrono::duration Seconds; + typedef std::chrono::time_point< + std::chrono::steady_clock, + std::chrono::duration> + TimePoint; + + format::BP5Deserializer *m_BP5Deserializer = nullptr; + /* transport manager for metadata file */ + transportman::TransportMan m_MDFileManager; + /* How many bytes of metadata have we already read in? */ + size_t m_MDFileAlreadyReadSize = 0; + /* How many bytes of metadata have we already processed? + * It is <= m_MDFileAlreadyReadSize, at = we need to read more */ + size_t m_MDFileProcessedSize = 0; + /* The file position of the first byte that is currently + * residing in memory. Needed for skewing positions when + * processing metadata index. + */ + size_t m_MDFileAbsolutePos = 0; + /* m_MDFileAbsolutePos <= m_MDFileProcessedSize <= m_MDFileAlreadyReadSize + */ + + /* transport manager for managing data file(s) */ + transportman::TransportMan m_DataFileManager; + + /* transport manager for managing the metadata index file */ + transportman::TransportMan m_MDIndexFileManager; + /* transport manager for managing the metadata index file */ + transportman::TransportMan m_FileMetaMetadataManager; + /* How many bytes of metadata index have we already read in? */ + size_t m_MDIndexFileAlreadyReadSize = 0; + + /* transport manager for managing the active flag file */ + transportman::TransportMan m_ActiveFlagFileManager; + bool m_WriterIsActive = true; + + /** used for per-step reads, TODO: to be moved to BP5Deserializer */ + size_t m_CurrentStep = 0; + size_t m_StepsCount = 0; + bool m_FirstStep = true; + bool m_IdxHeaderParsed = false; // true after first index parsing + + Minifooter m_Minifooter; + + void Init(); + void InitTransports(); + + /* Sleep up to pollSeconds time if we have not reached timeoutInstant. + * Return true if slept + * return false if sleep was not needed because it was overtime + */ + bool SleepOrQuit(const TimePoint &timeoutInstant, + const Seconds &pollSeconds); + /** Open one category of files within timeout. + * @return: 0 = OK, 1 = timeout, 2 = error + * lasterrmsg contains the error message in case of error + */ + size_t OpenWithTimeout(transportman::TransportMan &tm, + const std::vector &fileNames, + const TimePoint &timeoutInstant, + const Seconds &pollSeconds, + std::string &lasterrmsg /*INOUT*/); + + /** Open files within timeout. + * @return True if files are opened, False in case of timeout + */ + void OpenFiles(TimePoint &timeoutInstant, const Seconds &pollSeconds, + const Seconds &timeoutSeconds); + void InitBuffer(const TimePoint &timeoutInstant, const Seconds &pollSeconds, + const Seconds &timeoutSeconds); + + /** Read in more metadata if exist (throwing away old). + * For streaming only. + * @return size of new content from Index Table + */ + size_t UpdateBuffer(const TimePoint &timeoutInstant, + const Seconds &pollSeconds); + + void ParseMetadataIndex(format::BufferSTL &bufferSTL, + const size_t absoluteStartPos, const bool hasHeader, + const bool oneStepOnly); + /** Process the new metadata coming in (in UpdateBuffer) + * @param newIdxSize: the size of the new content from Index Table + */ + void ProcessMetadataForNewSteps(const size_t newIdxSize); + + /** Check the active status of the writer. + * @return true if writer is still active. + * It sets m_WriterIsActive. + */ + bool CheckWriterActive(); + + /** Check for a step that is already in memory but haven't + * been processed yet. + * @return true: if new step has been found and processed, false otherwise + * Used by CheckForNewSteps() to get the next step from memory if there is + * one. + */ + bool ProcessNextStepInMemory(); + + /** Check for new steps withing timeout and only if writer is active. + * @return the status flag + * Used by BeginStep() to get new steps from file when it reaches the + * end of steps in memory. + */ + StepStatus CheckForNewSteps(Seconds timeoutSeconds); + +#define declare_type(T) \ + void DoGetSync(Variable &, T *) final; \ + void DoGetDeferred(Variable &, T *) final; + ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + + void DoClose(const int transportIndex = -1) final; + + template + void GetSyncCommon(Variable &variable, T *data); + + template + void GetDeferredCommon(Variable &variable, T *data); + + template + void ReadVariableBlocks(Variable &variable); + +#define declare_type(T) \ + std::vector::BPInfo> DoBlocksInfo( \ + const Variable &variable, const size_t step) const final; + + ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + + size_t DoSteps() const final; + uint32_t m_WriterCount = 0; + uint32_t m_AggregatorCount = 0; + uint32_t m_WriterColumnMajor = 0; + bool m_ReaderIsRowMajor = true; + bool m_WriterIsRowMajor = true; + std::vector m_WriterToFileMap; + format::BufferSTL m_MetadataIndex; + format::BufferSTL m_MetaMetadata; + format::BufferSTL m_Metadata; + uint64_t MetadataExpectedMinFileSize(const std::string &IdxFileName, + bool hasHeader); + void InstallMetaMetaData(format::BufferSTL MetaMetadata); + void ReadData(const size_t WriterRank, const size_t Timestep, + const size_t StartOffset, const size_t Length, + char *Destination); +}; + +} // end namespace engine +} // end namespace core +} // end namespace adios2 + +#endif /* ADIOS2_ENGINE_BP5_BP5READER_H_ */ diff --git a/source/adios2/engine/bp5/BP5Reader.tcc b/source/adios2/engine/bp5/BP5Reader.tcc new file mode 100644 index 0000000000..0f1ef5e436 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Reader.tcc @@ -0,0 +1,43 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Reader.tcc + * + * Created on: Aug 1, 2018 + * Author: Lipeng Wan wanl@ornl.gov + */ + +#ifndef ADIOS2_ENGINE_BP5_BP5READER_TCC_ +#define ADIOS2_ENGINE_BP5_BP5READER_TCC_ + +#include "BP5Reader.h" + +#include "adios2/helper/adiosFunctions.h" + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +template +inline void BP5Reader::GetSyncCommon(Variable &variable, T *data) +{ + bool need_sync = m_BP5Deserializer->QueueGet(variable, data); + if (need_sync) + PerformGets(); +} + +template +void BP5Reader::GetDeferredCommon(Variable &variable, T *data) +{ + (void)m_BP5Deserializer->QueueGet(variable, data); +} + +} // end namespace engine +} // end namespace core +} // end namespace adios2 + +#endif /* ADIOS2_ENGINE_BP5_BP5READER_TCC_ */ diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp new file mode 100644 index 0000000000..2f1abe3884 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Writer.cpp @@ -0,0 +1,566 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Writer.cpp + * + */ + +#include "BP5Writer.h" +#include "BP5Writer.tcc" + +#include "adios2/common/ADIOSMacros.h" +#include "adios2/core/IO.h" +#include "adios2/helper/adiosFunctions.h" //CheckIndexRange +#include "adios2/toolkit/profiling/taustubs/tautimer.hpp" +#include "adios2/toolkit/transport/file/FileFStream.h" + +#include +#include + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +BP5Writer::BP5Writer(IO &io, const std::string &name, const Mode mode, + helper::Comm comm) +: Engine("BP5Writer", io, name, mode, std::move(comm)), m_BP5Serializer(), + m_FileDataManager(m_Comm), m_FileMetadataManager(m_Comm), + m_FileMetaMetadataManager(m_Comm), m_FileMetadataIndexManager(m_Comm) +{ + TAU_SCOPED_TIMER("BP5Writer::Open"); + m_IO.m_ReadStreaming = false; + m_EndMessage = " in call to IO Open BP5Writer " + m_Name + "\n"; + + Init(); +} + +StepStatus BP5Writer::BeginStep(StepMode mode, const float timeoutSeconds) +{ + m_WriterStep++; + return StepStatus::OK; +} + +size_t BP5Writer::CurrentStep() const { return m_WriterStep; } + +void BP5Writer::PerformPuts() +{ + TAU_SCOPED_TIMER("BP5Writer::PerformPuts"); + return; +} + +void BP5Writer::WriteMetaMetadata( + const std::vector MetaMetaBlocks) +{ + for (auto &b : MetaMetaBlocks) + { + m_FileMetaMetadataManager.WriteFiles((char *)&b.MetaMetaIDLen, + sizeof(size_t)); + m_FileMetaMetadataManager.WriteFiles((char *)&b.MetaMetaInfoLen, + sizeof(size_t)); + m_FileMetaMetadataManager.WriteFiles((char *)b.MetaMetaID, + b.MetaMetaIDLen); + m_FileMetaMetadataManager.WriteFiles((char *)b.MetaMetaInfo, + b.MetaMetaInfoLen); + } +} + +uint64_t BP5Writer::WriteMetadata(const std::vector MetaDataBlocks) +{ + uint64_t MDataTotalSize = 0; + uint64_t MetaDataSize = 0; + std::vector SizeVector; + SizeVector.reserve(MetaDataBlocks.size()); + for (auto &b : MetaDataBlocks) + { + MDataTotalSize += sizeof(uint64_t) + b.iov_len; + SizeVector.push_back(b.iov_len); + } + MetaDataSize = 0; + m_FileMetadataManager.WriteFiles((char *)&MDataTotalSize, sizeof(uint64_t)); + MetaDataSize += sizeof(uint64_t); + m_FileMetadataManager.WriteFiles((char *)SizeVector.data(), + sizeof(uint64_t) * SizeVector.size()); + MetaDataSize += sizeof(uint64_t) * SizeVector.size(); + for (auto &b : MetaDataBlocks) + { + if (!b.iov_base) + continue; + m_FileMetadataManager.WriteFiles((char *)b.iov_base, b.iov_len); + MetaDataSize += b.iov_len; + } + + m_MetaDataPos += MetaDataSize; + return MetaDataSize; +} + +void BP5Writer::WriteData(format::BufferV *Data) +{ + format::BufferV::BufferV_iovec DataVec = Data->DataVec(); + size_t DataSize = 0; + int i = 0; + while (DataVec[i].iov_base != NULL) + { + m_FileDataManager.WriteFiles((char *)DataVec[i].iov_base, + DataVec[i].iov_len); + DataSize += DataVec[i].iov_len; + i++; + } + std::cout << "before update m_DataPos is " << m_DataPos << std::endl; + m_DataPos += DataSize; +} + +void BP5Writer::WriteMetadataFileIndex(uint64_t MetaDataPos, + uint64_t MetaDataSize, + std::vector DataSizes) +{ + + m_FileMetadataManager.FlushFiles(); + + uint64_t buf[2]; + buf[0] = MetaDataPos; + buf[1] = MetaDataSize; + m_FileMetadataIndexManager.WriteFiles((char *)buf, sizeof(buf)); + for (int i = 0; i < DataSizes.size(); i++) + { + std::cout << "Writer data pos rank " << i << " = " << m_WriterDataPos[i] + << std::endl; + } + m_FileMetadataIndexManager.WriteFiles((char *)m_WriterDataPos.data(), + DataSizes.size() * sizeof(uint64_t)); + for (int i = 0; i < DataSizes.size(); i++) + { + m_WriterDataPos[i] += DataSizes[i]; + } +} + +void BP5Writer::EndStep() +{ + TAU_SCOPED_TIMER("BP5Writer::EndStep"); + + // true: advances step + auto TSInfo = m_BP5Serializer.CloseTimestep(m_WriterStep); + + /* TSInfo includes NewMetaMetaBlocks, the MetaEncodeBuffer, the + * AttributeEncodeBuffer and the data encode Vector */ + /* the first */ + + std::cout << "Endstp, data buffer size = " << TSInfo.DataBuffer->Size() + << std::endl; + std::vector MetaBuffer = m_BP5Serializer.CopyMetadataToContiguous( + TSInfo.NewMetaMetaBlocks, TSInfo.MetaEncodeBuffer, + TSInfo.DataBuffer->Size()); + + size_t LocalSize = MetaBuffer.size(); + std::vector RecvCounts = m_Comm.GatherValues(LocalSize, 0); + + std::vector *RecvBuffer = new std::vector; + if (m_Comm.Rank() == 0) + { + uint64_t TotalSize = 0; + for (auto &n : RecvCounts) + TotalSize += n; + RecvBuffer->resize(TotalSize); + } + m_Comm.GathervArrays(MetaBuffer.data(), LocalSize, RecvCounts.data(), + RecvCounts.size(), RecvBuffer->data(), 0); + + if (m_Comm.Rank() == 0) + { + std::vector UniqueMetaMetaBlocks; + std::vector DataSizes; + auto Metadata = m_BP5Serializer.BreakoutContiguousMetadata( + RecvBuffer, RecvCounts, UniqueMetaMetaBlocks, DataSizes); + std::cout << "Data sizes size " << DataSizes.size() << std::endl; + WriteMetaMetadata(UniqueMetaMetaBlocks); + uint64_t ThisMetaDataPos = m_MetaDataPos; + uint64_t ThisMetaDataSize = WriteMetadata(Metadata); + WriteMetadataFileIndex(ThisMetaDataPos, ThisMetaDataSize, DataSizes); + } + delete RecvBuffer; + WriteData(TSInfo.DataBuffer); +} + +// PRIVATE +void BP5Writer::Init() +{ + m_BP5Serializer.m_Engine = this; + m_RankMPI = m_Comm.Rank(); + InitParameters(); + InitTransports(); + InitBPBuffer(); +} + +#define declare_type(T) \ + void BP5Writer::DoPutSync(Variable &variable, const T *data) \ + { \ + PutCommon(variable, data, true); \ + } \ + void BP5Writer::DoPutDeferred(Variable &variable, const T *data) \ + { \ + PutCommon(variable, data, false); \ + } + +ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + +void BP5Writer::InitParameters() +{ + ParseParams(m_IO, m_Parameters); + m_WriteToBB = false; // !(m_Parameters.BurstBufferPath.empty()); + m_DrainBB = m_WriteToBB && m_Parameters.BurstBufferDrain; +} + +void BP5Writer::InitTransports() +{ + // TODO need to add support for aggregators here later + if (m_IO.m_TransportsParameters.empty()) + { + Params defaultTransportParameters; + defaultTransportParameters["transport"] = "File"; + m_IO.m_TransportsParameters.push_back(defaultTransportParameters); + } + + // only consumers will interact with transport managers + m_BBName = m_Name; + if (m_WriteToBB) + { + m_BBName = m_Parameters.BurstBufferPath + PathSeparator + m_Name; + } + + if (m_Aggregator.m_IsConsumer) + { + // Names passed to IO AddTransport option with key "Name" + const std::vector transportsNames = + m_FileDataManager.GetFilesBaseNames(m_BBName, + m_IO.m_TransportsParameters); + + // /path/name.bp.dir/name.bp.rank + m_SubStreamNames = GetBPSubStreamNames(transportsNames); + if (m_DrainBB) + { + const std::vector drainTransportNames = + m_FileDataManager.GetFilesBaseNames( + m_Name, m_IO.m_TransportsParameters); + m_DrainSubStreamNames = GetBPSubStreamNames(drainTransportNames); + /* start up BB thread */ + // m_FileDrainer.SetVerbose( + // m_Parameters.BurstBufferVerbose, + // m_Comm.Rank()); + m_FileDrainer.Start(); + } + } + + /* Create the directories either on target or burst buffer if used */ + // m_BP4Serializer.m_Profiler.Start("mkdir"); + + if (m_Comm.Rank() == 0) + { + const std::vector transportsNames = + m_FileMetadataManager.GetFilesBaseNames( + m_Name, m_IO.m_TransportsParameters); + + m_MetadataFileNames = GetBPMetadataFileNames(transportsNames); + m_MetaMetadataFileNames = GetBPMetaMetadataFileNames(transportsNames); + m_MetadataIndexFileNames = GetBPMetadataIndexFileNames(transportsNames); + } + m_FileMetadataManager.MkDirsBarrier(m_MetadataFileNames, + m_IO.m_TransportsParameters, + m_Parameters.NodeLocal || m_WriteToBB); + if (m_DrainBB) + { + /* Create the directories on target anyway by main thread */ + m_FileDataManager.MkDirsBarrier(m_DrainSubStreamNames, + m_IO.m_TransportsParameters, + m_Parameters.NodeLocal); + } + + if (m_Aggregator.m_IsConsumer) + { +#ifdef NOTDEF + if (m_Parameters.AsyncTasks) + { + for (size_t i = 0; i < m_IO.m_TransportsParameters.size(); ++i) + { + m_IO.m_TransportsParameters[i]["asynctasks"] = "true"; + } + } +#endif + m_FileDataManager.OpenFiles(m_SubStreamNames, m_OpenMode, + m_IO.m_TransportsParameters, false); + + if (m_DrainBB) + { + for (const auto &name : m_DrainSubStreamNames) + { + m_FileDrainer.AddOperationOpen(name, m_OpenMode); + } + } + } + + if (m_Comm.Rank() == 0) + { + m_FileMetaMetadataManager.OpenFiles(m_MetaMetadataFileNames, m_OpenMode, + m_IO.m_TransportsParameters, false); + + m_FileMetadataManager.OpenFiles(m_MetadataFileNames, m_OpenMode, + m_IO.m_TransportsParameters, false); + + uint64_t WriterCount = m_Comm.Size(); + m_FileMetadataIndexManager.OpenFiles( + m_MetadataIndexFileNames, m_OpenMode, m_IO.m_TransportsParameters, + false); + + if (m_DrainBB) + { + const std::vector drainTransportNames = + m_FileDataManager.GetFilesBaseNames( + m_Name, m_IO.m_TransportsParameters); + m_DrainMetadataFileNames = + GetBPMetadataFileNames(drainTransportNames); + m_DrainMetadataIndexFileNames = + GetBPMetadataIndexFileNames(drainTransportNames); + + for (const auto &name : m_DrainMetadataFileNames) + { + m_FileDrainer.AddOperationOpen(name, m_OpenMode); + } + for (const auto &name : m_DrainMetadataIndexFileNames) + { + m_FileDrainer.AddOperationOpen(name, m_OpenMode); + } + } + } +} + +/*generate the header for the metadata index file*/ +void BP5Writer::MakeHeader(format::BufferSTL &b, const std::string fileType, + const bool isActive) +{ + auto lf_CopyVersionChar = [](const std::string version, + std::vector &buffer, size_t &position) { + helper::CopyToBuffer(buffer, position, version.c_str()); + }; + + auto &buffer = b.m_Buffer; + auto &position = b.m_Position; + auto &absolutePosition = b.m_AbsolutePosition; + if (position > 0) + { + throw std::invalid_argument( + "ERROR: BP4Serializer::MakeHeader can only be called for an empty " + "buffer. This one for " + + fileType + " already has content of " + std::to_string(position) + + " bytes."); + } + + if (b.GetAvailableSize() < 64) + { + b.Resize(position + 64, "BP4Serializer::MakeHeader " + fileType); + } + + const std::string majorVersion(std::to_string(ADIOS2_VERSION_MAJOR)); + const std::string minorVersion(std::to_string(ADIOS2_VERSION_MINOR)); + const std::string patchVersion(std::to_string(ADIOS2_VERSION_PATCH)); + + // byte 0-31: Readable tag + if (position != m_VersionTagPosition) + { + throw std::runtime_error( + "ADIOS Coding ERROR in BP4Serializer::MakeHeader. Version Tag " + "position mismatch"); + } + std::string versionLongTag("ADIOS-BP v" + majorVersion + "." + + minorVersion + "." + patchVersion + " "); + size_t maxTypeLen = m_VersionTagLength - versionLongTag.size(); + const std::string fileTypeStr = fileType.substr(0, maxTypeLen); + versionLongTag += fileTypeStr; + const size_t versionLongTagSize = versionLongTag.size(); + if (versionLongTagSize < m_VersionTagLength) + { + helper::CopyToBuffer(buffer, position, versionLongTag.c_str(), + versionLongTagSize); + position += m_VersionTagLength - versionLongTagSize; + } + else if (versionLongTagSize > m_VersionTagLength) + { + helper::CopyToBuffer(buffer, position, versionLongTag.c_str(), + m_VersionTagLength); + } + else + { + helper::CopyToBuffer(buffer, position, versionLongTag.c_str(), + m_VersionTagLength); + } + + // byte 32-35: MAJOR MINOR PATCH Unused + + lf_CopyVersionChar(majorVersion, buffer, position); + lf_CopyVersionChar(minorVersion, buffer, position); + lf_CopyVersionChar(patchVersion, buffer, position); + ++position; + + // Note: Reader does process and use bytes 36-38 in + // BP4Deserialize.cpp::ParseMetadataIndex(). + // Order and position must match there. + + // byte 36: endianness + if (position != m_EndianFlagPosition) + { + throw std::runtime_error( + "ADIOS Coding ERROR in BP4Serializer::MakeHeader. Endian Flag " + "position mismatch"); + } + const uint8_t endianness = helper::IsLittleEndian() ? 0 : 1; + helper::CopyToBuffer(buffer, position, &endianness); + + // byte 37: BP Version 4 + if (position != m_BPVersionPosition) + { + throw std::runtime_error( + "ADIOS Coding ERROR in BP4Serializer::MakeHeader. Active Flag " + "position mismatch"); + } + const uint8_t version = 5; + helper::CopyToBuffer(buffer, position, &version); + + // byte 38: Active flag (used in Index Table only) + if (position != m_ActiveFlagPosition) + { + throw std::runtime_error( + "ADIOS Coding ERROR in BP4Serializer::MakeHeader. Active Flag " + "position mismatch"); + } + const uint8_t activeFlag = (isActive ? 1 : 0); + helper::CopyToBuffer(buffer, position, &activeFlag); + + // byte 39: Minor file version + const uint8_t subversion = 0; + helper::CopyToBuffer(buffer, position, &version); + + // bytes 40-43 writer count + const uint32_t WriterCount = m_Comm.Size(); + helper::CopyToBuffer(buffer, position, &WriterCount); + // bytes 44-47 aggregator count + helper::CopyToBuffer(buffer, position, &WriterCount); + // byte 48 columnMajor + // write if data is column major in metadata and data + const uint8_t columnMajor = + (helper::IsRowMajor(m_IO.m_HostLanguage) == false) ? 'y' : 'n'; + helper::CopyToBuffer(buffer, position, &columnMajor); + + // byte 45-63: unused + position += 15; + absolutePosition = position; +} + +void BP5Writer::InitBPBuffer() +{ + /* This is a new file. + * Make headers in data buffer and metadata buffer (but do not write + * them yet so that Open() can stay free of writing to disk) + */ + if (m_Comm.Rank() == 0) + { + format::BufferSTL b; + MakeHeader(b, "Metadata", false); + m_FileMetadataManager.WriteFiles(b.m_Buffer.data(), b.m_Position); + m_MetaDataPos = b.m_Position; + format::BufferSTL bi; + MakeHeader(bi, "Index Table", true); + m_FileMetadataIndexManager.WriteFiles(bi.m_Buffer.data(), + bi.m_Position); + std::vector Assignment(m_Comm.Size()); + for (uint64_t i = 0; i < m_Comm.Size(); i++) + { + Assignment[i] = i; // Change when we do aggregation + } + // where each rank's data will end up + m_FileMetadataIndexManager.WriteFiles((char *)Assignment.data(), + sizeof(Assignment[0]) * + Assignment.size()); + } + if (m_Aggregator.m_IsConsumer) + { + format::BufferSTL d; + MakeHeader(d, "Data", false); + m_FileDataManager.WriteFiles(d.m_Buffer.data(), d.m_Position); + m_DataPos = d.m_Position; + m_WriterDataPos.resize(m_Comm.Size()); + for (auto &DataPos : m_WriterDataPos) + { + DataPos = m_DataPos; + } + } +} + +void BP5Writer::DoFlush(const bool isFinal, const int transportIndex) +{ + m_FileMetadataManager.FlushFiles(); + m_FileMetaMetadataManager.FlushFiles(); + m_FileDataManager.FlushFiles(); + // m_BP4Serializer.ResetBuffer(m_BP4Serializer.m_Data, false, false); + + // if (m_Parameters.CollectiveMetadata) + // { + // WriteCollectiveMetadataFile(); + // } + // if (m_BP4Serializer.m_Aggregator.m_IsActive) + // { + // AggregateWriteData(isFinal, transportIndex); + // } + // else + // { + // WriteData(isFinal, transportIndex); + // } +} + +void BP5Writer::DoClose(const int transportIndex) +{ + TAU_SCOPED_TIMER("BP5Writer::Close"); + PerformPuts(); + + DoFlush(true, transportIndex); + + m_FileDataManager.CloseFiles(transportIndex); + // Delete files from temporary storage if draining was on + + if (m_Comm.Rank() == 0) + { + // close metadata file + m_FileMetadataManager.CloseFiles(); + + // close metametadata file + m_FileMetaMetadataManager.CloseFiles(); + + // close metadata index file + m_FileMetadataIndexManager.CloseFiles(); + } +} + +/*write the content of metadata index file*/ +void BP5Writer::PopulateMetadataIndexFileContent( + format::BufferSTL &b, const uint64_t currentStep, const uint64_t mpirank, + const uint64_t pgIndexStart, const uint64_t variablesIndexStart, + const uint64_t attributesIndexStart, const uint64_t currentStepEndPos, + const uint64_t currentTimeStamp) +{ + TAU_SCOPED_TIMER("BP5Writer::PopulateMetadataIndexFileContent"); + auto &buffer = b.m_Buffer; + auto &position = b.m_Position; + helper::CopyToBuffer(buffer, position, ¤tStep); + helper::CopyToBuffer(buffer, position, &mpirank); + helper::CopyToBuffer(buffer, position, &pgIndexStart); + helper::CopyToBuffer(buffer, position, &variablesIndexStart); + helper::CopyToBuffer(buffer, position, &attributesIndexStart); + helper::CopyToBuffer(buffer, position, ¤tStepEndPos); + helper::CopyToBuffer(buffer, position, ¤tTimeStamp); + position += 8; +} + +} // end namespace engine +} // end namespace core +} // end namespace adios2 diff --git a/source/adios2/engine/bp5/BP5Writer.h b/source/adios2/engine/bp5/BP5Writer.h new file mode 100644 index 0000000000..cfcec6c43f --- /dev/null +++ b/source/adios2/engine/bp5/BP5Writer.h @@ -0,0 +1,173 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Writer.h + * + */ + +#ifndef ADIOS2_ENGINE_BP5_BP5WRITER_H_ +#define ADIOS2_ENGINE_BP5_BP5WRITER_H_ + +#include "adios2/common/ADIOSConfig.h" +#include "adios2/core/Engine.h" +#include "adios2/engine/bp5/BP5Engine.h" +#include "adios2/helper/adiosComm.h" +#include "adios2/toolkit/aggregator/mpi/MPIChain.h" +#include "adios2/toolkit/burstbuffer/FileDrainerSingleThread.h" +#include "adios2/toolkit/format/bp5/BP5Serializer.h" +#include "adios2/toolkit/transportman/TransportMan.h" + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +class BP5Writer : public BP5Engine, public core::Engine +{ + +public: + /** + * Constructor for file Writer in BP5 format + * @param name unique name given to the engine + * @param openMode w (supported), r, a from OpenMode in ADIOSTypes.h + * @param comm multi-process communicator + */ + BP5Writer(IO &io, const std::string &name, const Mode mode, + helper::Comm comm); + + ~BP5Writer() = default; + + StepStatus BeginStep(StepMode mode, + const float timeoutSeconds = -1.0) final; + size_t CurrentStep() const final; + void PerformPuts() final; + void EndStep() final; + +private: + /** Single object controlling BP buffering */ + format::BP5Serializer m_BP5Serializer; + + /** Manage BP data files Transports from IO AddTransport */ + transportman::TransportMan m_FileDataManager; + + /** Manages the optional collective metadata files */ + transportman::TransportMan m_FileMetadataManager; + + /* transport manager for managing the metadata index file */ + transportman::TransportMan m_FileMetadataIndexManager; + + transportman::TransportMan m_FileMetaMetadataManager; + + int64_t m_WriterStep = -1; + /* + * Burst buffer variables + */ + /** true if burst buffer is used to write */ + bool m_WriteToBB = false; + /** true if burst buffer is drained to disk */ + bool m_DrainBB = true; + /** File drainer thread if burst buffer is used */ + burstbuffer::FileDrainerSingleThread m_FileDrainer; + /** m_Name modified with burst buffer path if BB is used, + * == m_Name otherwise. + * m_Name is a constant of Engine and is the user provided target path + */ + std::string m_BBName; + /* Name of subfiles to directly write to (for all transports) + * This is either original target or burst buffer if used */ + std::vector m_SubStreamNames; + /* Name of subfiles on target if burst buffer is used (for all transports) + */ + std::vector m_DrainSubStreamNames; + std::vector m_MetadataFileNames; + std::vector m_DrainMetadataFileNames; + std::vector m_MetaMetadataFileNames; + std::vector m_MetadataIndexFileNames; + std::vector m_DrainMetadataIndexFileNames; + std::vector m_ActiveFlagFileNames; + + void Init() final; + + /** Parses parameters from IO SetParameters */ + void InitParameters() final; + /** Parses transports and parameters from IO AddTransport */ + void InitTransports() final; + /** Allocates memory and starts a PG group */ + void InitBPBuffer(); + +#define declare_type(T) \ + void DoPutSync(Variable &, const T *) final; \ + void DoPutDeferred(Variable &, const T *) final; + + ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + + template + void PutCommon(Variable &variable, const T *data, bool sync); + + void DoFlush(const bool isFinal = false, const int transportIndex = -1); + + void DoClose(const int transportIndex = -1) final; + + /** Write a profiling.json file from m_BP1Writer and m_TransportsManager + * profilers*/ + void WriteProfilingJSONFile(); + + void WriteMetaMetadata( + const std::vector MetaMetaBlocks); + + void WriteMetadataFileIndex(uint64_t MetaDataPos, uint64_t MetaDataSize, + std::vector DataSizes); + + uint64_t WriteMetadata(const std::vector MetaDataBlocks); + + void WriteData(format::BufferV *Data); + + void PopulateMetadataIndexFileContent( + format::BufferSTL &buffer, const uint64_t currentStep, + const uint64_t mpirank, const uint64_t pgIndexStart, + const uint64_t variablesIndexStart, const uint64_t attributesIndexStart, + const uint64_t currentStepEndPos, const uint64_t currentTimeStamp); + + void UpdateActiveFlag(const bool active); + + void WriteCollectiveMetadataFile(const bool isFinal = false); + + /** + * N-to-N data buffers writes, including metadata file + * @param transportIndex + */ + // void WriteData(const bool isFinal, const int transportIndex = -1); + + /** + * N-to-M (aggregation) data buffers writes, including metadata file + * @param transportIndex + */ + void AggregateWriteData(const bool isFinal, const int transportIndex = -1); + + template + T *BufferDataCommon(const size_t payloadOffset, + const size_t bufferID) noexcept; + + template + void PerformPutCommon(Variable &variable); + + /** manages all communication tasks in aggregation */ + aggregator::MPIChain m_Aggregator; + +private: + uint64_t m_MetaDataPos = 0; // updated during WriteMetaData + uint64_t m_DataPos = 0; // updated during WriteData + std::vector m_WriterDataPos; + void MakeHeader(format::BufferSTL &b, const std::string fileType, + const bool isActive); +}; + +} // end namespace engine +} // end namespace core +} // end namespace adios2 + +#endif /* ADIOS2_ENGINE_BP5_BP5WRITER_H_ */ diff --git a/source/adios2/engine/bp5/BP5Writer.tcc b/source/adios2/engine/bp5/BP5Writer.tcc new file mode 100644 index 0000000000..75785bfaa5 --- /dev/null +++ b/source/adios2/engine/bp5/BP5Writer.tcc @@ -0,0 +1,53 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Writer.tcc implementation of template functions with known type + * + * Created on: Aug 1, 2018 + * Author: Lipeng Wan wanl@ornl.gov + */ +#ifndef ADIOS2_ENGINE_BP5_BP5WRITER_TCC_ +#define ADIOS2_ENGINE_BP5_BP5WRITER_TCC_ + +#include "BP5Writer.h" + +namespace adios2 +{ +namespace core +{ +namespace engine +{ + +template +void BP5Writer::PutCommon(Variable &variable, const T *values, bool sync) +{ + variable.SetData(values); + + size_t *Shape = NULL; + size_t *Start = NULL; + size_t *Count = NULL; + size_t DimCount = 0; + + if (variable.m_ShapeID == ShapeID::GlobalArray) + { + DimCount = variable.m_Shape.size(); + Shape = variable.m_Shape.data(); + Start = variable.m_Start.data(); + Count = variable.m_Count.data(); + } + else if (variable.m_ShapeID == ShapeID::LocalArray) + { + DimCount = variable.m_Count.size(); + Count = variable.m_Count.data(); + } + m_BP5Serializer.Marshal((void *)&variable, variable.m_Name.c_str(), + variable.m_Type, variable.m_ElementSize, DimCount, + Shape, Count, Start, values, sync); +} + +} // end namespace engine +} // end namespace core +} // end namespace adios2 + +#endif /* ADIOS2_ENGINE_BP5_BP5WRITER_TCC_ */ diff --git a/source/adios2/engine/sst/SstParamParser.cpp b/source/adios2/engine/sst/SstParamParser.cpp index e8055f4d11..f31938ccaa 100644 --- a/source/adios2/engine/sst/SstParamParser.cpp +++ b/source/adios2/engine/sst/SstParamParser.cpp @@ -137,6 +137,10 @@ void SstParamParser::ParseParams(IO &io, struct _SstParams &Params) { parameter = SstMarshalBP; } + else if (method == "bp5") + { + parameter = SstMarshalBP5; + } else { throw std::invalid_argument( diff --git a/source/adios2/engine/sst/SstReader.cpp b/source/adios2/engine/sst/SstReader.cpp index 1aa964324b..d80512d482 100644 --- a/source/adios2/engine/sst/SstReader.cpp +++ b/source/adios2/engine/sst/SstReader.cpp @@ -47,8 +47,8 @@ SstReader::SstReader(IO &io, const std::string &name, const Mode mode, } // Maybe need other writer-side params in the future, but for now only - // marshal method. - SstReaderGetParams(m_Input, &m_WriterMarshalMethod); + // marshal method, and if the writer is row major. + SstReaderGetParams(m_Input, &m_WriterMarshalMethod, &m_WriterIsRowMajor); auto varFFSCallback = [](void *reader, const char *variableName, const int type, void *data) { @@ -273,7 +273,58 @@ StepStatus SstReader::BeginStep(StepMode Mode, const float timeout_sec) m_BetweenStepPairs = true; - if (m_WriterMarshalMethod == SstMarshalBP) + if (m_WriterMarshalMethod == SstMarshalBP5) + { + m_CurrentStepMetaData = SstGetCurMetadata(m_Input); + if (!m_BP5Deserializer) + { + m_BP5Deserializer = new format::BP5Deserializer( + m_CurrentStepMetaData->WriterCohortSize, m_WriterIsRowMajor, + Params.IsRowMajor); + m_BP5Deserializer->m_Engine = this; + } + SstMetaMetaList MMList = + SstGetNewMetaMetaData(m_Input, SstCurrentStep(m_Input)); + // m_BP5Deserializer->StepInit(m_IO.m_Parameters, + // "in call to BP5::BeginStep", "bp5"); + int i = 0; + while (MMList && MMList[i].BlockData) + { + format::BP5Base::MetaMetaInfoBlock MM; + MM.MetaMetaID = MMList[i].ID; + MM.MetaMetaIDLen = MMList[i].IDSize; + MM.MetaMetaInfo = MMList[i].BlockData; + MM.MetaMetaInfoLen = MMList[i].BlockSize; + m_BP5Deserializer->InstallMetaMetaData(MM); + i++; + } + free(MMList); + + SstBlock AttributeBlockList = + SstGetAttributeData(m_Input, SstCurrentStep(m_Input)); + i = 0; + while (AttributeBlockList && AttributeBlockList[i].BlockData) + { + m_IO.RemoveAllAttributes(); + m_BP5Deserializer->InstallAttributeData( + AttributeBlockList[i].BlockData, + AttributeBlockList[i].BlockSize); + i++; + } + + m_IO.RemoveAllVariables(); + m_BP5Deserializer->SetupForTimestep(SstCurrentStep(m_Input)); + + for (int i = 0; i < m_CurrentStepMetaData->WriterCohortSize; i++) + { + struct _SstData *tmp = m_CurrentStepMetaData->WriterMetadata[i]; + m_BP5Deserializer->InstallMetaData(tmp->block, tmp->DataSize, i); + } + + m_IO.ResetVariablesStepSelection(true, + "in call to SST Reader BeginStep"); + } + else if (m_WriterMarshalMethod == SstMarshalBP) { TAU_SCOPED_TIMER( "BP Marshaling Case - deserialize and install metadata"); @@ -372,7 +423,7 @@ void SstReader::EndStep() "ERROR: Writer failed before returning data"); } } - if (m_WriterMarshalMethod == SstMarshalBP) + else if (m_WriterMarshalMethod == SstMarshalBP) { PerformGets(); @@ -392,6 +443,11 @@ void SstReader::EndStep() // delete m_BP3Deserializer; } + if (m_WriterMarshalMethod == SstMarshalBP5) + { + + BP5PerformGets(); + } else { // unknown marshaling method, shouldn't happen @@ -451,7 +507,8 @@ void SstReader::Init() SstFFSPerformGets(m_Input); \ } \ } \ - if (m_WriterMarshalMethod == SstMarshalBP) \ + if ((m_WriterMarshalMethod == SstMarshalBP) || \ + (m_WriterMarshalMethod == SstMarshalBP5)) \ { \ /* DoGetSync() is going to have terrible performance 'cause */ \ /* it's a bad idea in an SST-like environment. But do */ \ @@ -522,16 +579,52 @@ void SstReader::Init() variable.m_Name); \ } \ } \ + if (m_WriterMarshalMethod == SstMarshalBP5) \ + { \ + bool need_sync = m_BP5Deserializer->QueueGet(variable, data); \ + } \ } ADIOS2_FOREACH_STDTYPE_1ARG(declare_gets) #undef declare_gets +void SstReader::BP5PerformGets() +{ + auto ReadRequests = m_BP5Deserializer->GenerateReadRequests(); + std::vector sstReadHandlers; + for (const auto &Req : ReadRequests) + { + void *dp_info = NULL; + if (m_CurrentStepMetaData->DP_TimestepInfo) + { + dp_info = m_CurrentStepMetaData->DP_TimestepInfo[Req.WriterRank]; + } + auto ret = SstReadRemoteMemory(m_Input, Req.WriterRank, Req.Timestep, + Req.StartOffset, Req.ReadLength, + Req.DestinationAddr, dp_info); + sstReadHandlers.push_back(ret); + } + for (const auto &i : sstReadHandlers) + { + if (SstWaitForCompletion(m_Input, i) != SstSuccess) + { + throw std::runtime_error( + "ERROR: Writer failed before returning data"); + } + } + + m_BP5Deserializer->FinalizeGets(ReadRequests); +} + void SstReader::PerformGets() { if (m_WriterMarshalMethod == SstMarshalFFS) { SstFFSPerformGets(m_Input); } + else if (m_WriterMarshalMethod == SstMarshalBP5) + { + BP5PerformGets(); + } else if (m_WriterMarshalMethod == SstMarshalBP) { std::vector sstReadHandlers; @@ -631,6 +724,10 @@ void SstReader::DoClose(const int transportIndex) { SstReaderClose(m_Input); } { \ return m_BP3Deserializer->BlocksInfo(variable, 0); \ } \ + else if (m_WriterMarshalMethod == SstMarshalBP5) \ + { \ + return m_BP5Deserializer->BlocksInfo(variable, 0); \ + } \ throw std::invalid_argument( \ "ERROR: Unknown marshal mechanism in DoBlocksInfo\n"); \ } diff --git a/source/adios2/engine/sst/SstReader.h b/source/adios2/engine/sst/SstReader.h index 3931c6b963..45359688b8 100644 --- a/source/adios2/engine/sst/SstReader.h +++ b/source/adios2/engine/sst/SstReader.h @@ -17,6 +17,7 @@ #include "adios2/core/IO.h" #include "adios2/helper/adiosComm.h" #include "adios2/toolkit/format/bp/bp3/BP3Deserializer.h" +#include "adios2/toolkit/format/bp5/BP5Deserializer.h" namespace adios2 { @@ -63,15 +64,18 @@ class SstReader : public Engine template void SstBPPerformGets(); + void BP5PerformGets(); void Init(); SstStream m_Input; SstMarshalMethod m_WriterMarshalMethod; + int m_WriterIsRowMajor; bool m_DefinitionsNotified = false; bool m_BetweenStepPairs = false; /* --- Used only with BP marshaling --- */ SstFullMetadata m_CurrentStepMetaData = NULL; format::BP3Deserializer *m_BP3Deserializer; + format::BP5Deserializer *m_BP5Deserializer = nullptr; /* --- Used only with BP marshaling --- */ struct _SstParams Params; diff --git a/source/adios2/engine/sst/SstWriter.cpp b/source/adios2/engine/sst/SstWriter.cpp index 3bcb6e6f4f..9320cfa10a 100644 --- a/source/adios2/engine/sst/SstWriter.cpp +++ b/source/adios2/engine/sst/SstWriter.cpp @@ -147,6 +147,17 @@ StepStatus SstWriter::BeginStep(StepMode mode, const float timeout_sec) m_BP3Serializer->m_MetadataSet.TimeStep = 1; m_BP3Serializer->m_MetadataSet.CurrentStep = m_WriterStep; } + else if (Params.MarshalMethod == SstMarshalBP5) + { + m_BP5Serializer = + std::unique_ptr(new format::BP5Serializer()); + m_BP5Serializer->m_Engine = this; + // m_BP5Serializer->Init(m_IO.m_Parameters, + // "in call to BP5::Open for writing", + // "sst"); + // m_BP5Serializer->m_MetadataSet.TimeStep = 1; + // m_BP5Serializer->m_MetadataSet.CurrentStep = m_WriterStep; + } else { // unknown marshaling method, shouldn't happen @@ -154,7 +165,7 @@ StepStatus SstWriter::BeginStep(StepMode mode, const float timeout_sec) return StepStatus::OK; } -void SstWriter::FFSMarshalAttributes() +void SstWriter::MarshalAttributes() { TAU_SCOPED_TIMER_FUNC(); const auto &attributes = m_IO.GetAttributes(); @@ -162,7 +173,7 @@ void SstWriter::FFSMarshalAttributes() const uint32_t attributesCount = static_cast(attributes.size()); // if there are no new attributes, nothing to do - if (attributesCount == m_FFSMarshaledAttributesCount) + if (attributesCount == m_MarshaledAttributesCount) return; for (const auto &attributePair : attributes) @@ -184,8 +195,14 @@ void SstWriter::FFSMarshalAttributes() // } - SstFFSMarshalAttribute(m_Output, name.c_str(), (int)type, - sizeof(char *), element_count, data_addr); + if (Params.MarshalMethod == SstMarshalFFS) + SstFFSMarshalAttribute(m_Output, name.c_str(), (int)type, + sizeof(char *), element_count, + data_addr); + else if (Params.MarshalMethod == SstMarshalBP5) + m_BP5Serializer->MarshalAttribute(name.c_str(), type, + sizeof(char *), element_count, + data_addr); } #define declare_type(T) \ else if (type == helper::GetDataType()) \ @@ -198,13 +215,20 @@ void SstWriter::FFSMarshalAttributes() element_count = attribute.m_Elements; \ data_addr = attribute.m_DataArray.data(); \ } \ - SstFFSMarshalAttribute(m_Output, attribute.m_Name.c_str(), (int)type, \ - sizeof(T), element_count, data_addr); \ + if (Params.MarshalMethod == SstMarshalFFS) \ + SstFFSMarshalAttribute(m_Output, attribute.m_Name.c_str(), \ + (int)type, sizeof(T), element_count, \ + data_addr); \ + else if (Params.MarshalMethod == SstMarshalBP5) \ + m_BP5Serializer->MarshalAttribute(attribute.m_Name.c_str(), type, \ + sizeof(T), element_count, \ + data_addr); \ } ADIOS2_FOREACH_ATTRIBUTE_PRIMITIVE_STDTYPE_1ARG(declare_type) #undef declare_type } + m_MarshaledAttributesCount = attributesCount; } void SstWriter::EndStep() @@ -225,10 +249,57 @@ void SstWriter::EndStep() { TAU_SCOPED_TIMER("Marshaling Overhead"); TAU_START("SstMarshalFFS"); - FFSMarshalAttributes(); + MarshalAttributes(); TAU_STOP("SstMarshalFFS"); SstFFSWriterEndStep(m_Output, m_WriterStep); } + else if (Params.MarshalMethod == SstMarshalBP5) + { + MarshalAttributes(); + auto TSInfo = m_BP5Serializer->CloseTimestep(m_WriterStep); + auto lf_FreeBlocks = [](void *vBlock) { + BP3DataBlock *BlockToFree = + reinterpret_cast(vBlock); + // Free data and metadata blocks here. BlockToFree is the newblock + // value in the enclosing function. + delete BlockToFree; + }; + + BP5DataBlock *newblock = new BP5DataBlock; + SstMetaMetaList MetaMetaBlocks = (SstMetaMetaList)malloc( + (TSInfo.NewMetaMetaBlocks.size() + 1) * sizeof(MetaMetaBlocks[0])); + int i = 0; + for (const auto &MM : TSInfo.NewMetaMetaBlocks) + { + MetaMetaBlocks[i].BlockData = MM.MetaMetaInfo; + MetaMetaBlocks[i].BlockSize = MM.MetaMetaInfoLen; + MetaMetaBlocks[i].ID = MM.MetaMetaID; + MetaMetaBlocks[i].IDSize = MM.MetaMetaIDLen; + i++; + } + MetaMetaBlocks[TSInfo.NewMetaMetaBlocks.size()] = {NULL, 0, NULL, 0}; + newblock->metadata.DataSize = TSInfo.MetaEncodeBuffer->m_FixedSize; + newblock->metadata.block = TSInfo.MetaEncodeBuffer->Data(); + newblock->data.DataSize = TSInfo.DataBuffer->DataVec()[0].iov_len; + newblock->data.block = (char *)TSInfo.DataBuffer->DataVec()[0].iov_base; + if (TSInfo.AttributeEncodeBuffer) + { + newblock->attribute_data.DataSize = + TSInfo.AttributeEncodeBuffer->m_FixedSize; + newblock->attribute_data.block = + TSInfo.AttributeEncodeBuffer->Data(); + } + else + { + newblock->attribute_data.DataSize = 0; + newblock->attribute_data.block = NULL; + } + TAU_STOP("Marshaling overhead"); + SstProvideTimestepMM(m_Output, &newblock->metadata, &newblock->data, + m_WriterStep, lf_FreeBlocks, newblock, + &newblock->attribute_data, NULL, newblock, + MetaMetaBlocks); + } else if (Params.MarshalMethod == SstMarshalBP) { // This should finalize BP marshaling at the writer side. All diff --git a/source/adios2/engine/sst/SstWriter.h b/source/adios2/engine/sst/SstWriter.h index 4e1099dfa4..c5191c978e 100644 --- a/source/adios2/engine/sst/SstWriter.h +++ b/source/adios2/engine/sst/SstWriter.h @@ -15,6 +15,7 @@ #include "adios2/core/Engine.h" #include "adios2/helper/adiosComm.h" #include "adios2/toolkit/format/bp/bp3/BP3Serializer.h" +#include "adios2/toolkit/format/bp5/BP5Serializer.h" #include "adios2/toolkit/sst/sst.h" #include @@ -63,16 +64,24 @@ class SstWriter : public Engine _SstData metadata; format::BP3Serializer *serializer; }; + struct BP5DataBlock + { + _SstData data; + _SstData metadata; + _SstData attribute_data; + }; std::unique_ptr m_BP3Serializer; + std::unique_ptr m_BP5Serializer; + SstStream m_Output; long m_WriterStep = -1; bool m_BetweenStepPairs = false; bool m_DefinitionsNotified = false; - size_t m_FFSMarshaledAttributesCount = 0; + size_t m_MarshaledAttributesCount = 0; struct _SstParams Params; - void FFSMarshalAttributes(); + void MarshalAttributes(); void DoClose(const int transportIndex = -1) final; }; diff --git a/source/adios2/engine/sst/SstWriter.tcc b/source/adios2/engine/sst/SstWriter.tcc index 0d4f2bb3eb..35ef060940 100644 --- a/source/adios2/engine/sst/SstWriter.tcc +++ b/source/adios2/engine/sst/SstWriter.tcc @@ -36,7 +36,8 @@ void SstWriter::PutSyncCommon(Variable &variable, const T *values) "BeginStep/EndStep pairs"); } - if (Params.MarshalMethod == SstMarshalFFS) + if ((Params.MarshalMethod == SstMarshalFFS) || + (Params.MarshalMethod == SstMarshalBP5)) { size_t *Shape = NULL; size_t *Start = NULL; @@ -55,9 +56,19 @@ void SstWriter::PutSyncCommon(Variable &variable, const T *values) DimCount = variable.m_Count.size(); Count = variable.m_Count.data(); } - SstFFSMarshal(m_Output, (void *)&variable, variable.m_Name.c_str(), - (int)variable.m_Type, variable.m_ElementSize, DimCount, - Shape, Count, Start, values); + if (Params.MarshalMethod == SstMarshalFFS) + { + SstFFSMarshal(m_Output, (void *)&variable, variable.m_Name.c_str(), + (int)variable.m_Type, variable.m_ElementSize, + DimCount, Shape, Count, Start, values); + } + else + { + m_BP5Serializer->Marshal((void *)&variable, variable.m_Name.c_str(), + variable.m_Type, variable.m_ElementSize, + DimCount, Shape, Count, Start, values, + true); + } } else if (Params.MarshalMethod == SstMarshalBP) { diff --git a/source/adios2/toolkit/format/bp5/BP5Base.cpp b/source/adios2/toolkit/format/bp5/BP5Base.cpp new file mode 100644 index 0000000000..5e946d2b67 --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Base.cpp @@ -0,0 +1,58 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Serializer.h + * + */ + +#include "adios2/core/Attribute.h" +#include "adios2/core/Engine.h" +#include "adios2/core/IO.h" + +#include "BP5Base.h" + +#include + +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ + +void BP5Base::FFSBitfieldSet(struct FFSMetadataInfoStruct *MBase, int Bit) +{ + int Element = Bit / (sizeof(size_t) * 8); + int ElementBit = Bit % (sizeof(size_t) * 8); + if (Element >= MBase->BitFieldCount) + { + MBase->BitField = + (size_t *)realloc(MBase->BitField, sizeof(size_t) * (Element + 1)); + memset(MBase->BitField + MBase->BitFieldCount, 0, + (Element - MBase->BitFieldCount + 1) * sizeof(size_t)); + MBase->BitFieldCount = Element + 1; + } + MBase->BitField[Element] |= (1 << ElementBit); +} + +int BP5Base::FFSBitfieldTest(struct FFSMetadataInfoStruct *MBase, int Bit) +{ + int Element = Bit / (sizeof(size_t) * 8); + int ElementBit = Bit % (sizeof(size_t) * 8); + if (Element >= MBase->BitFieldCount) + { + MBase->BitField = + (size_t *)realloc(MBase->BitField, sizeof(size_t) * (Element + 1)); + memset(MBase->BitField + MBase->BitFieldCount, 0, + (Element - MBase->BitFieldCount + 1) * sizeof(size_t)); + MBase->BitFieldCount = Element + 1; + } + return ((MBase->BitField[Element] & (1 << ElementBit)) == + (1 << ElementBit)); +} + +} +} diff --git a/source/adios2/toolkit/format/bp5/BP5Base.h b/source/adios2/toolkit/format/bp5/BP5Base.h new file mode 100644 index 0000000000..3767d705c0 --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Base.h @@ -0,0 +1,62 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Base.h + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BP5_BP5BASE_H_ +#define ADIOS2_TOOLKIT_FORMAT_BP5_BP5BASE_H_ + +#include "adios2/core/Attribute.h" +#include "adios2/core/IO.h" +#include "adios2/toolkit/format/buffer/BufferV.h" +#include "adios2/toolkit/format/buffer/heap/BufferSTL.h" +#include "atl.h" +#include "ffs.h" +#include "fm.h" +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ + +class BP5Base +{ +public: + struct MetaMetaInfoBlock + { + char *MetaMetaInfo; + size_t MetaMetaInfoLen; + char *MetaMetaID; + size_t MetaMetaIDLen; + }; + + typedef struct _MetaArrayRec + { + size_t Dims; // How many dimensions does this array have + size_t BlockCount; // How many blocks are written + size_t DBCount; // Dimens * BlockCount + size_t *Shape; // Global dimensionality [Dims] NULL for local + size_t *Count; // Per-block Counts [DBCount] + size_t *Offsets; // Per-block Offsets [DBCount] NULL for local + size_t *DataLocation; + } MetaArrayRec; + + struct FFSMetadataInfoStruct + { + size_t BitFieldCount; + size_t *BitField; + size_t DataBlockSize; + }; + + void FFSBitfieldSet(struct FFSMetadataInfoStruct *MBase, int Bit); + int FFSBitfieldTest(struct FFSMetadataInfoStruct *MBase, int Bit); +}; +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_UTILITIES_FORMAT_B5_BP5Base_H_ */ diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp new file mode 100644 index 0000000000..fe460235ea --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp @@ -0,0 +1,1022 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Serializer.h + * + */ + +#include "adios2/core/Attribute.h" +#include "adios2/core/Engine.h" +#include "adios2/core/IO.h" + +#include "BP5Deserializer.h" +#include "BP5Deserializer.tcc" + +#include + +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ +void BP5Deserializer::InstallMetaMetaData(MetaMetaInfoBlock &MM) +{ + char *FormatID = (char *)malloc(MM.MetaMetaIDLen); + char *MetaMetaInfo = (char *)malloc(MM.MetaMetaInfoLen); + memcpy(FormatID, MM.MetaMetaID, MM.MetaMetaIDLen); + memcpy(MetaMetaInfo, MM.MetaMetaInfo, MM.MetaMetaInfoLen); + load_external_format_FMcontext(FMContext_from_FFS(ReaderFFSContext), + FormatID, MM.MetaMetaIDLen, MetaMetaInfo); + free(FormatID); +} + +BP5Deserializer::ControlInfo *BP5Deserializer::GetPriorControl(FMFormat Format) +{ + struct ControlInfo *tmp = ControlBlocks; + while (tmp) + { + if (tmp->Format == Format) + { + return tmp; + } + tmp = tmp->Next; + } + return NULL; +} + +bool BP5Deserializer::NameIndicatesArray(const char *Name) +{ + int Len = strlen(Name); + return (strcmp("Dims", Name + Len - 4) == 0); +} + +DataType BP5Deserializer::TranslateFFSType2ADIOS(const char *Type, int size) +{ + if (strcmp(Type, "integer") == 0) + { + if (size == 1) + { + return DataType::Int8; + } + else if (size == 2) + { + return DataType::Int16; + } + else if (size == 4) + { + return DataType::Int32; + } + else if (size == 8) + { + return DataType::Int64; + } + } + else if (strcmp(Type, "unsigned integer") == 0) + { + if (size == 1) + { + return DataType::UInt8; + } + else if (size == 2) + { + return DataType::UInt16; + } + else if (size == 4) + { + return DataType::UInt32; + } + else if (size == 8) + { + return DataType::UInt64; + } + } + else if ((strcmp(Type, "double") == 0) || (strcmp(Type, "float") == 0)) + { + if (size == sizeof(float)) + { + return DataType::Float; + } + else if ((sizeof(long double) != sizeof(double)) && + (size == sizeof(long double))) + { + return DataType::Double; + } + else + { + return DataType::Double; + } + } + else if (strcmp(Type, "complex4") == 0) + { + return DataType::FloatComplex; + } + else if (strcmp(Type, "complex8") == 0) + { + return DataType::DoubleComplex; + } + return DataType::None; +} + +void BP5Deserializer::BreakdownVarName(const char *Name, char **base_name_p, + DataType *type_p, int *element_size_p) +{ + int Type; + int ElementSize; + const char *NameStart = strchr(strchr(Name, '_') + 1, '_') + 1; + sscanf(Name, "SST%d_%d_", &ElementSize, &Type); + *element_size_p = ElementSize; + *type_p = (DataType)Type; + *base_name_p = strdup(NameStart); +} + +void BP5Deserializer::BreakdownArrayName(const char *Name, char **base_name_p, + DataType *type_p, int *element_size_p) +{ + int Type; + int ElementSize; + const char *NameStart = strchr(strchr(Name, '_') + 1, '_') + 1; + sscanf(Name, "SST%d_%d_", &ElementSize, &Type); + *element_size_p = ElementSize; + *type_p = (DataType)Type; + *base_name_p = strdup(NameStart); + (*base_name_p)[strlen(*base_name_p) - 4] = 0; // kill "Dims" +} + +BP5Deserializer::BP5VarRec *BP5Deserializer::LookupVarByKey(void *Key) +{ + auto ret = VarByKey[Key]; + return ret; +} + +BP5Deserializer::BP5VarRec *BP5Deserializer::LookupVarByName(const char *Name) +{ + auto ret = VarByName[Name]; + return ret; +} + +BP5Deserializer::BP5VarRec *BP5Deserializer::CreateVarRec(const char *ArrayName) +{ + BP5VarRec *Ret = new BP5VarRec(m_WriterCohortSize); + Ret->VarName = strdup(ArrayName); + Ret->Variable = nullptr; + VarByName[Ret->VarName] = Ret; + return Ret; +} + +BP5Deserializer::ControlInfo *BP5Deserializer::BuildControl(FMFormat Format) +{ + FMStructDescList FormatList = format_list_of_FMFormat(Format); + FMFieldList FieldList = FormatList[0].field_list; + while (strncmp(FieldList->field_name, "BitField", 8) == 0) + FieldList++; + while (FieldList->field_name && + (strncmp(FieldList->field_name, "DataBlockSize", 8) == 0)) + FieldList++; + int i = 0; + int ControlCount = 0; + ControlInfo *ret = (BP5Deserializer::ControlInfo *)malloc(sizeof(*ret)); + ret->Format = Format; + while (FieldList[i].field_name) + { + ret = (ControlInfo *)realloc( + ret, sizeof(*ret) + ControlCount * sizeof(struct ControlInfo)); + struct ControlStruct *C = &(ret->Controls[ControlCount]); + ControlCount++; + + C->FieldIndex = i; + C->FieldOffset = FieldList[i].field_offset; + + if (NameIndicatesArray(FieldList[i].field_name)) + { + char *ArrayName; + DataType Type; + BP5VarRec *VarRec = nullptr; + int ElementSize; + C->IsArray = 1; + BreakdownArrayName(FieldList[i].field_name, &ArrayName, &Type, + &ElementSize); + // if (WriterRank != 0) + // { + VarRec = LookupVarByName(ArrayName); + // } + if (!VarRec) + { + VarRec = CreateVarRec(ArrayName); + VarRec->Type = Type; + VarRec->ElementSize = ElementSize; + C->ElementSize = ElementSize; + } + i += 7; // number of fields in MetaArrayRec + free(ArrayName); + C->VarRec = VarRec; + } + else + { + /* simple field */ + char *FieldName = strdup(FieldList[i].field_name + 4); // skip SST_ + BP5VarRec *VarRec = NULL; + C->IsArray = 0; + VarRec = LookupVarByName(FieldName); + if (!VarRec) + { + DataType Type = TranslateFFSType2ADIOS(FieldList[i].field_type, + FieldList[i].field_size); + VarRec = CreateVarRec(FieldName); + VarRec->DimCount = 0; + C->Type = Type; + VarRec->Type = Type; + } + VarRec->ElementSize = FieldList[i].field_size; + C->ElementSize = FieldList[i].field_size; + C->VarRec = VarRec; + free(FieldName); + i++; + } + } + ret->ControlCount = ControlCount; + ret->Next = ControlBlocks; + ControlBlocks = ret; + return ret; +} + +void BP5Deserializer::ReverseDimensions(size_t *Dimensions, int count) +{ + for (int i = 0; i < count / 2; i++) + { + size_t tmp = Dimensions[i]; + Dimensions[i] = Dimensions[count - i - 1]; + Dimensions[count - i - 1] = tmp; + } +} + +void *BP5Deserializer::VarSetup(core::Engine *engine, const char *variableName, + const DataType Type, void *data) +{ + if (Type == adios2::DataType::Compound) + { + return (void *)NULL; + } +#define declare_type(T) \ + else if (Type == helper::GetDataType()) \ + { \ + core::Variable *variable = \ + &(engine->m_IO.DefineVariable(variableName)); \ + variable->SetData((T *)data); \ + variable->m_AvailableStepsCount = 1; \ + return (void *)variable; \ + } + + ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + + return (void *)NULL; +}; + +void *BP5Deserializer::ArrayVarSetup(core::Engine *engine, + const char *variableName, + const DataType type, int DimCount, + size_t *Shape, size_t *Start, + size_t *Count) +{ + std::vector VecShape; + std::vector VecStart; + std::vector VecCount; + adios2::DataType Type = (adios2::DataType)type; + /* + * setup shape of array variable as global (I.E. Count == Shape, + * Start == 0) + */ + if (Shape) + { + for (int i = 0; i < DimCount; i++) + { + VecShape.push_back(Shape[i]); + VecStart.push_back(0); + VecCount.push_back(Shape[i]); + } + } + else + { + VecShape = {}; + VecStart = {}; + for (int i = 0; i < DimCount; i++) + { + VecCount.push_back(Count[i]); + } + } + + if (Type == adios2::DataType::Compound) + { + return (void *)NULL; + } +#define declare_type(T) \ + else if (Type == helper::GetDataType()) \ + { \ + core::Variable *variable = &(engine->m_IO.DefineVariable( \ + variableName, VecShape, VecStart, VecCount)); \ + variable->m_AvailableStepsCount = 1; \ + return (void *)variable; \ + } + ADIOS2_FOREACH_STDTYPE_1ARG(declare_type) +#undef declare_type + return (void *)NULL; +}; + +void BP5Deserializer::SetupForTimestep(size_t Timestep) +{ + CurTimestep = Timestep; + PendingRequests.clear(); + for (auto RecPair : VarByKey) + { + RecPair.second->Variable = NULL; + } +} +void BP5Deserializer::InstallMetaData(void *MetadataBlock, size_t BlockLen, + size_t WriterRank) +{ + FFSTypeHandle FFSformat; + void *BaseData; + static int DumpMetadata = -1; + FFSformat = + FFSTypeHandle_from_encode(ReaderFFSContext, (char *)MetadataBlock); + if (!FFShas_conversion(FFSformat)) + { + FMContext FMC = FMContext_from_FFS(ReaderFFSContext); + FMFormat Format = FMformat_from_ID(FMC, (char *)MetadataBlock); + FMStructDescList List = + FMcopy_struct_list(format_list_of_FMFormat(Format)); + FMlocalize_structs(List); + establish_conversion(ReaderFFSContext, FFSformat, List); + FMfree_struct_list(List); + } + if (FFSdecode_in_place_possible(FFSformat)) + { + FFSdecode_in_place(ReaderFFSContext, (char *)MetadataBlock, &BaseData); + } + else + { + int DecodedLength = FFS_est_decode_length( + ReaderFFSContext, (char *)MetadataBlock, BlockLen); + BaseData = malloc(DecodedLength); + FFSdecode_to_buffer(ReaderFFSContext, (char *)MetadataBlock, BaseData); + } + if (DumpMetadata == -1) + { + DumpMetadata = (getenv("BP5DumpMetadata") != NULL); + } + if (DumpMetadata && (WriterRank == 0)) + { + printf("\nIncomingMetadatablock from WriterRank %d is %p :\n", + (int)WriterRank, BaseData); + FMdump_data(FMFormat_of_original(FFSformat), BaseData, 1024000); + printf("\n\n"); + } + struct ControlInfo *Control; + struct ControlStruct *ControlArray; + Control = GetPriorControl(FMFormat_of_original(FFSformat)); + if (!Control) + { + Control = BuildControl(FMFormat_of_original(FFSformat)); + } + ControlArray = &Control->Controls[0]; + ActiveControl[WriterRank] = Control; + + MetadataBaseAddrs[WriterRank] = BaseData; + for (int i = 0; i < Control->ControlCount; i++) + { + int FieldOffset = ControlArray[i].FieldOffset; + BP5VarRec *VarRec = ControlArray[i].VarRec; + void *field_data = (char *)BaseData + FieldOffset; + if (!FFSBitfieldTest((FFSMetadataInfoStruct *)BaseData, i)) + { + continue; + } + if (ControlArray[i].IsArray) + { + MetaArrayRec *meta_base = (MetaArrayRec *)field_data; + if ((meta_base->Dims > 1) && + (m_WriterIsRowMajor != m_ReaderIsRowMajor)) + { + /* if we're getting data from someone of the other array gender, + * switcheroo */ + ReverseDimensions(meta_base->Shape, meta_base->Dims); + ReverseDimensions(meta_base->Count, meta_base->Dims); + ReverseDimensions(meta_base->Offsets, meta_base->Dims); + } + if (WriterRank == 0) + { + VarRec->GlobalDims = meta_base->Shape; + } + if (!VarRec->Variable) + { + VarRec->Variable = ArrayVarSetup( + m_Engine, VarRec->VarName, VarRec->Type, meta_base->Dims, + meta_base->Shape, meta_base->Offsets, meta_base->Count); + VarByKey[VarRec->Variable] = VarRec; + } + VarRec->DimCount = meta_base->Dims; + VarRec->PerWriterBlockCount[WriterRank] = + meta_base->Dims ? meta_base->DBCount / meta_base->Dims : 1; + VarRec->PerWriterStart[WriterRank] = meta_base->Offsets; + VarRec->PerWriterCounts[WriterRank] = meta_base->Count; + VarRec->PerWriterDataLocation[WriterRank] = meta_base->DataLocation; + if (WriterRank == 0) + { + VarRec->PerWriterBlockStart[WriterRank] = 0; + } + if (WriterRank < m_WriterCohortSize - 1) + { + VarRec->PerWriterBlockStart[WriterRank + 1] = + VarRec->PerWriterBlockStart[WriterRank] + + VarRec->PerWriterBlockCount[WriterRank]; + } +#ifdef NOTDEF + // needs to be replaced with Simple Blocks Info + for (int i = 0; i < VarRec->PerWriterBlockCount[WriterRank]; i++) + { + size_t *Offsets = NULL; + if (meta_base->Offsets) + Offsets = meta_base->Offsets + (i * meta_base->Dims); + ArrayBlocksInfoUpcall(m_Engine, VarRec->Variable, VarRec->Type, + WriterRank, meta_base->Dims, + meta_base->Shape, Offsets, + meta_base->Count); + } +#endif + } + else + { + if (!VarRec->Variable) + { + VarRec->Variable = VarSetup(m_Engine, VarRec->VarName, + VarRec->Type, field_data); + VarByKey[VarRec->Variable] = VarRec; + } + VarRec->PerWriterMetaFieldOffset[WriterRank] = FieldOffset; + } + } +} + +void BP5Deserializer::InstallAttributeData(void *AttributeBlock, + size_t BlockLen) +{ + static int DumpMetadata = -1; + m_Engine->m_IO.RemoveAllAttributes(); + FMFieldList FieldList; + FMStructDescList FormatList; + void *BaseData; + FFSTypeHandle FFSformat; + + if (BlockLen == 0) + return; + + FFSformat = + FFSTypeHandle_from_encode(ReaderFFSContext, (char *)AttributeBlock); + if (!FFShas_conversion(FFSformat)) + { + FMContext FMC = FMContext_from_FFS(ReaderFFSContext); + FMFormat Format = FMformat_from_ID(FMC, (char *)AttributeBlock); + FMStructDescList List = + FMcopy_struct_list(format_list_of_FMFormat(Format)); + FMlocalize_structs(List); + establish_conversion(ReaderFFSContext, FFSformat, List); + FMfree_struct_list(List); + } + + if (FFSdecode_in_place_possible(FFSformat)) + { + FFSdecode_in_place(ReaderFFSContext, (char *)AttributeBlock, &BaseData); + } + else + { + int DecodedLength = FFS_est_decode_length( + ReaderFFSContext, (char *)AttributeBlock, BlockLen); + BaseData = malloc(DecodedLength); + FFSBuffer decode_buf = + create_fixed_FFSBuffer((char *)BaseData, DecodedLength); + FFSdecode_to_buffer(ReaderFFSContext, (char *)AttributeBlock, + decode_buf); + } + if (DumpMetadata == -1) + { + DumpMetadata = (getenv("SstDumpMetadata") != NULL); + } + if (DumpMetadata) + { + printf("\nIncomingAttributeDatablock is %p :\n", BaseData); + FMdump_data(FMFormat_of_original(FFSformat), BaseData, 1024000); + printf("\n\n"); + } + FormatList = format_list_of_FMFormat(FMFormat_of_original(FFSformat)); + FieldList = FormatList[0].field_list; + int i = 0; + while (FieldList[i].field_name) + { + char *FieldName; + void *field_data = (char *)BaseData + FieldList[i].field_offset; + + DataType Type; + int ElemSize; + BreakdownVarName(FieldList[i].field_name, &FieldName, &Type, &ElemSize); + if (Type == adios2::DataType::Compound) + { + return; + } + else if (Type == helper::GetDataType()) + { + m_Engine->m_IO.DefineAttribute(FieldName, + *(char **)field_data); + } +#define declare_type(T) \ + else if (Type == helper::GetDataType()) \ + { \ + m_Engine->m_IO.DefineAttribute(FieldName, *(T *)field_data); \ + } + + ADIOS2_FOREACH_ATTRIBUTE_PRIMITIVE_STDTYPE_1ARG(declare_type) +#undef declare_type + else + { + std::cout << "Loading attribute matched no type " << ToString(Type) + << std::endl; + } + free(FieldName); + i++; + } +} + +bool BP5Deserializer::QueueGet(core::VariableBase &variable, void *DestData) +{ + if (variable.m_SingleValue) + { + int WriterRank = 0; + if (variable.m_SelectionType == adios2::SelectionType::WriteBlock) + WriterRank = variable.m_BlockID; + + BP5VarRec *VarRec = VarByKey[&variable]; + char *src = ((char *)MetadataBaseAddrs[WriterRank]) + + VarRec->PerWriterMetaFieldOffset[WriterRank]; + memcpy(DestData, src, variable.m_ElementSize); + return false; + } + if (variable.m_SelectionType == adios2::SelectionType::BoundingBox) + { + BP5ArrayRequest Req; + Req.VarRec = VarByKey[&variable]; + Req.RequestType = Global; + Req.BlockID = variable.m_BlockID; + Req.Count = variable.m_Count; + Req.Start = variable.m_Start; + Req.Data = DestData; + PendingRequests.push_back(Req); + } + else if (variable.m_SelectionType == adios2::SelectionType::WriteBlock) + { + BP5ArrayRequest Req; + Req.VarRec = VarByKey[&variable]; + Req.RequestType = Local; + Req.BlockID = variable.m_BlockID; + Req.Count = variable.m_Count; + Req.Data = DestData; + PendingRequests.push_back(Req); + } + else + { + } + return false; +} + +bool BP5Deserializer::NeedWriter(BP5ArrayRequest Req, int i) +{ + if (Req.RequestType == Local) + { + size_t NodeFirst = Req.VarRec->PerWriterBlockStart[i]; + size_t NodeLast = Req.VarRec->PerWriterBlockCount[i] + NodeFirst - 1; + return (NodeFirst <= Req.BlockID) && (NodeLast >= Req.BlockID); + } + // else Global case + for (int j = 0; j < Req.VarRec->DimCount; j++) + { + size_t SelOffset = Req.Start[j]; + size_t SelSize = Req.Count[j]; + size_t RankOffset; + size_t RankSize; + if (Req.VarRec->PerWriterStart[i] == NULL) + /* this writer didn't write */ + { + return false; + } + RankOffset = Req.VarRec->PerWriterStart[i][j]; + RankSize = Req.VarRec->PerWriterCounts[i][j]; + if ((SelSize == 0) || (RankSize == 0)) + { + return false; + } + if ((RankOffset < SelOffset && (RankOffset + RankSize) <= SelOffset) || + (RankOffset >= SelOffset + SelSize)) + { + return false; + } + } + return true; +} + +std::vector +BP5Deserializer::GenerateReadRequests() +{ + std::vector Ret; + for (auto &W : WriterInfo) + { + W.Status = Empty; + W.RawBuffer = NULL; + } + + for (const auto &Req : PendingRequests) + { + for (int i = 0; i < m_WriterCohortSize; i++) + { + if ((WriterInfo[i].Status != Needed) && (NeedWriter(Req, i))) + { + WriterInfo[i].Status = Needed; + } + } + } + + for (int i = 0; i < m_WriterCohortSize; i++) + { + if (WriterInfo[i].Status == Needed) + { + ReadRequest RR; + RR.Timestep = CurTimestep; + RR.WriterRank = i; + RR.StartOffset = 0; + RR.ReadLength = + ((struct FFSMetadataInfoStruct *)MetadataBaseAddrs[i]) + ->DataBlockSize; + RR.DestinationAddr = (char *)malloc(RR.ReadLength); + RR.Internal = NULL; + Ret.push_back(RR); + } + } + return Ret; +} + +void BP5Deserializer::FinalizeGets(std::vector Requests) +{ + for (const auto &Req : PendingRequests) + { + // ImplementGapWarning(Reqs); + for (int i = 0; i < m_WriterCohortSize; i++) + { + if (NeedWriter(Req, i)) + { + /* if needed this writer fill destination with acquired data */ + int ElementSize = Req.VarRec->ElementSize; + int DimCount = Req.VarRec->DimCount; + size_t *GlobalDimensions = Req.VarRec->GlobalDims; + size_t *RankOffset = Req.VarRec->PerWriterStart[i]; + const size_t *RankSize = Req.VarRec->PerWriterCounts[i]; + std::vector ZeroSel(DimCount); + std::vector ZeroRankOffset(DimCount); + std::vector ZeroGlobalDimensions(DimCount); + const size_t *SelOffset = NULL; + const size_t *SelSize = Req.Count.data(); + int ReqIndex = 0; + while (Requests[ReqIndex].WriterRank != i) + ReqIndex++; + char *IncomingData = + (char *)Requests[ReqIndex].DestinationAddr + + Req.VarRec->PerWriterDataLocation[i][0]; + + if (Req.Start.size()) + { + SelOffset = Req.Start.data(); + } + if (Req.RequestType == Local) + { + int LocalBlockID = + Req.BlockID - Req.VarRec->PerWriterBlockStart[i]; + size_t DataOffset = 0; + for (int i = 0; i < LocalBlockID; i++) + { + int BlockElemCount = 1; + for (int j = 0; j < DimCount; j++) + { + BlockElemCount *= RankSize[j]; + } + DataOffset += BlockElemCount * ElementSize; + RankSize += DimCount; + } + RankOffset = ZeroRankOffset.data(); + GlobalDimensions = ZeroGlobalDimensions.data(); + if (SelOffset == NULL) + { + SelOffset = ZeroSel.data(); + } + for (int i = 0; i < DimCount; i++) + { + GlobalDimensions[i] = RankSize[i]; + } + IncomingData = IncomingData + DataOffset; + } + if (m_ReaderIsRowMajor) + { + ExtractSelectionFromPartialRM( + ElementSize, DimCount, GlobalDimensions, RankOffset, + RankSize, SelOffset, SelSize, IncomingData, + (char *)Req.Data); + } + else + { + ExtractSelectionFromPartialCM( + ElementSize, DimCount, GlobalDimensions, RankOffset, + RankSize, SelOffset, SelSize, IncomingData, + (char *)Req.Data); + } + } + } + } + PendingRequests.clear(); +} + +void BP5Deserializer::MapGlobalToLocalIndex(size_t Dims, + const size_t *GlobalIndex, + const size_t *LocalOffsets, + size_t *LocalIndex) +{ + for (int i = 0; i < Dims; i++) + { + LocalIndex[i] = GlobalIndex[i] - LocalOffsets[i]; + } +} + +int BP5Deserializer::FindOffset(size_t Dims, const size_t *Size, + const size_t *Index) +{ + int Offset = 0; + for (int i = 0; i < Dims; i++) + { + Offset = Index[i] + (Size[i] * Offset); + } + return Offset; +} + +static int FindOffsetCM(size_t Dims, const size_t *Size, const size_t *Index) +{ + int Offset = 0; + for (int i = Dims - 1; i >= 0; i--) + { + Offset = Index[i] + (Size[i] * Offset); + } + return Offset; +} + +#define MAX(x, y) (((x) > (y)) ? (x) : (y)) +#define MIN(x, y) (((x) < (y)) ? (x) : (y)) + +/* + * - ElementSize is the byte size of the array elements + * - Dims is the number of dimensions in the variable + * - GlobalDims is an array, Dims long, giving the size of each dimension + * - PartialOffsets is an array, Dims long, giving the starting offsets per + * dimension of this data block in the global array + * - PartialCounts is an array, Dims long, giving the size per dimension + * of this data block in the global array + * - SelectionOffsets is an array, Dims long, giving the starting offsets in + * the + * global array of the output selection. + * - SelectionCounts is an array, Dims long, giving the size per dimension + * of the output selection. + * - InData is the input, a slab of the global array + * - OutData is the output, to be filled with the selection array. + */ +// Row major version +void BP5Deserializer::ExtractSelectionFromPartialRM( + int ElementSize, size_t Dims, const size_t *GlobalDims, + const size_t *PartialOffsets, const size_t *PartialCounts, + const size_t *SelectionOffsets, const size_t *SelectionCounts, + const char *InData, char *OutData) +{ + size_t BlockSize; + size_t SourceBlockStride = 0; + size_t DestBlockStride = 0; + size_t SourceBlockStartOffset; + size_t DestBlockStartOffset; + size_t BlockCount; + size_t OperantDims; + size_t OperantElementSize; + + BlockSize = 1; + OperantDims = Dims; + OperantElementSize = ElementSize; + for (int Dim = Dims - 1; Dim >= 0; Dim--) + { + if ((GlobalDims[Dim] == PartialCounts[Dim]) && + (SelectionCounts[Dim] == PartialCounts[Dim])) + { + BlockSize *= GlobalDims[Dim]; + OperantDims--; /* last dimension doesn't matter, we got all and we + want all */ + OperantElementSize *= GlobalDims[Dim]; + } + else + { + size_t Left = MAX(PartialOffsets[Dim], SelectionOffsets[Dim]); + size_t Right = MIN(PartialOffsets[Dim] + PartialCounts[Dim], + SelectionOffsets[Dim] + SelectionCounts[Dim]); + BlockSize *= (Right - Left); + break; + } + } + if (OperantDims > 0) + { + SourceBlockStride = PartialCounts[OperantDims - 1] * OperantElementSize; + DestBlockStride = SelectionCounts[OperantDims - 1] * OperantElementSize; + } + + /* calculate first selected element and count */ + BlockCount = 1; + size_t *FirstIndex = (size_t *)malloc(Dims * sizeof(FirstIndex[0])); + for (int Dim = 0; Dim < Dims; Dim++) + { + size_t Left = MAX(PartialOffsets[Dim], SelectionOffsets[Dim]); + size_t Right = MIN(PartialOffsets[Dim] + PartialCounts[Dim], + SelectionOffsets[Dim] + SelectionCounts[Dim]); + if (Dim < OperantDims - 1) + { + BlockCount *= (Right - Left); + } + FirstIndex[Dim] = Left; + } + size_t *SelectionIndex = (size_t *)malloc(Dims * sizeof(SelectionIndex[0])); + MapGlobalToLocalIndex(Dims, FirstIndex, SelectionOffsets, SelectionIndex); + DestBlockStartOffset = FindOffset(Dims, SelectionCounts, SelectionIndex); + free(SelectionIndex); + DestBlockStartOffset *= ElementSize; + + size_t *PartialIndex = (size_t *)malloc(Dims * sizeof(PartialIndex[0])); + MapGlobalToLocalIndex(Dims, FirstIndex, PartialOffsets, PartialIndex); + SourceBlockStartOffset = FindOffset(Dims, PartialCounts, PartialIndex); + free(PartialIndex); + SourceBlockStartOffset *= ElementSize; + + InData += SourceBlockStartOffset; + OutData += DestBlockStartOffset; + size_t i; + for (i = 0; i < BlockCount; i++) + { + memcpy(OutData, InData, BlockSize * ElementSize); + InData += SourceBlockStride; + OutData += DestBlockStride; + } + free(FirstIndex); +} + +// Column-major version +void BP5Deserializer::ExtractSelectionFromPartialCM( + int ElementSize, size_t Dims, const size_t *GlobalDims, + const size_t *PartialOffsets, const size_t *PartialCounts, + const size_t *SelectionOffsets, const size_t *SelectionCounts, + const char *InData, char *OutData) +{ + int BlockSize; + int SourceBlockStride = 0; + int DestBlockStride = 0; + int SourceBlockStartOffset; + int DestBlockStartOffset; + int BlockCount; + int OperantElementSize; + + BlockSize = 1; + OperantElementSize = ElementSize; + for (int Dim = 0; Dim < Dims; Dim++) + { + if ((GlobalDims[Dim] == PartialCounts[Dim]) && + (SelectionCounts[Dim] == PartialCounts[Dim])) + { + BlockSize *= GlobalDims[Dim]; + OperantElementSize *= GlobalDims[Dim]; + /* skip the first bit of everything */ + GlobalDims++; + PartialOffsets++; + PartialCounts++; + SelectionOffsets++; + SelectionCounts++; + Dims--; + /* and make sure we do the next dimensions appropriately by + * repeating this iterator value */ + Dim--; + } + else + { + int Left = MAX(PartialOffsets[Dim], SelectionOffsets[Dim]); + int Right = MIN(PartialOffsets[Dim] + PartialCounts[Dim], + SelectionOffsets[Dim] + SelectionCounts[Dim]); + BlockSize *= (Right - Left); + break; + } + } + if (Dims > 0) + { + SourceBlockStride = PartialCounts[0] * OperantElementSize; + DestBlockStride = SelectionCounts[0] * OperantElementSize; + } + + /* calculate first selected element and count */ + BlockCount = 1; + size_t *FirstIndex = (size_t *)malloc(Dims * sizeof(FirstIndex[0])); + for (int Dim = 0; Dim < Dims; Dim++) + { + int Left = MAX(PartialOffsets[Dim], SelectionOffsets[Dim]); + int Right = MIN(PartialOffsets[Dim] + PartialCounts[Dim], + SelectionOffsets[Dim] + SelectionCounts[Dim]); + if (Dim > 0) + { + BlockCount *= (Right - Left); + } + FirstIndex[Dim] = Left; + } + size_t *SelectionIndex = (size_t *)malloc(Dims * sizeof(SelectionIndex[0])); + MapGlobalToLocalIndex(Dims, FirstIndex, SelectionOffsets, SelectionIndex); + DestBlockStartOffset = FindOffsetCM(Dims, SelectionCounts, SelectionIndex); + free(SelectionIndex); + DestBlockStartOffset *= OperantElementSize; + + size_t *PartialIndex = (size_t *)malloc(Dims * sizeof(PartialIndex[0])); + MapGlobalToLocalIndex(Dims, FirstIndex, PartialOffsets, PartialIndex); + SourceBlockStartOffset = FindOffsetCM(Dims, PartialCounts, PartialIndex); + + free(PartialIndex); + SourceBlockStartOffset *= OperantElementSize; + + InData += SourceBlockStartOffset; + OutData += DestBlockStartOffset; + for (int i = 0; i < BlockCount; i++) + { + memcpy(OutData, InData, BlockSize * ElementSize); + InData += SourceBlockStride; + OutData += DestBlockStride; + } + free(FirstIndex); +} + +BP5Deserializer::BP5Deserializer(int WriterCount, bool WriterIsRowMajor, + bool ReaderIsRowMajor) +: m_WriterCohortSize{WriterCount}, m_WriterIsRowMajor{WriterIsRowMajor}, + m_ReaderIsRowMajor{ReaderIsRowMajor} +{ + FMContext Tmp = create_local_FMcontext(); + ReaderFFSContext = create_FFSContext_FM(Tmp); + free_FMcontext(Tmp); + WriterInfo.resize(m_WriterCohortSize); + MetadataBaseAddrs.resize(m_WriterCohortSize); + MetadataFieldLists.resize(m_WriterCohortSize); + DataBaseAddrs.resize(m_WriterCohortSize); + ActiveControl.resize(m_WriterCohortSize); +} + +BP5Deserializer::~BP5Deserializer() +{ + free_FFSContext(ReaderFFSContext); + for (int i = 0; i < m_WriterCohortSize; i++) + { + if (WriterInfo[i].RawBuffer) + free(WriterInfo[i].RawBuffer); + } + // for (int i = 0; i < Info.VarCount; i++) + // { + // free(Info.VarList[i]->VarName); + // free(Info.VarList[i]->PerWriterMetaFieldOffset); + // free(Info.VarList[i]->PerWriterBlockCount); + // free(Info.VarList[i]->PerWriterBlockStart); + // free(Info.VarList[i]->PerWriterStart); + // free(Info.VarList[i]->PerWriterCounts); + // free(Info.VarList[i]->PerWriterIncomingData); + // free(Info.VarList[i]->PerWriterIncomingSize); + // free(Info.VarList[i]); + // } + struct ControlInfo *tmp = ControlBlocks; + ControlBlocks = NULL; + while (tmp) + { + struct ControlInfo *next = tmp->Next; + free(tmp); + tmp = next; + } +} + +#define declare_template_instantiation(T) \ + \ + template std::vector::BPInfo> \ + BP5Deserializer::BlocksInfo(const core::Variable &, const size_t) \ + const; +ADIOS2_FOREACH_STDTYPE_1ARG(declare_template_instantiation) +#undef declare_template_instantiation + +} + +} diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.h b/source/adios2/toolkit/format/bp5/BP5Deserializer.h new file mode 100644 index 0000000000..0503bea936 --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.h @@ -0,0 +1,210 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Deserializer.h + * + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BP5_BP5DESERIALIZER_H_ +#define ADIOS2_TOOLKIT_FORMAT_BP5_BP5DESERIALIZER_H_ + +#include "adios2/core/Attribute.h" +#include "adios2/core/IO.h" +#include "adios2/core/Variable.h" + +#include "BP5Base.h" +#include "atl.h" +#include "ffs.h" +#include "fm.h" + +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ + +class BP5Deserializer : virtual public BP5Base +{ + +public: + BP5Deserializer(int WriterCount, bool WriterIsRowMajor, + bool ReaderIsRowMajor); + + ~BP5Deserializer(); + + struct ReadRequest + { + size_t Timestep; + size_t WriterRank; + size_t StartOffset; + size_t ReadLength; + char *DestinationAddr; + void *Internal; + }; + void InstallMetaMetaData(MetaMetaInfoBlock &MMList); + void InstallMetaData(void *MetadataBlock, size_t BlockLen, + size_t WriterRank); + void InstallAttributeData(void *AttributeBlock, size_t BlockLen); + void SetupForTimestep(size_t t); + // return from QueueGet is true if a sync is needed to fill the data + bool QueueGet(core::VariableBase &variable, void *DestData); + + std::vector GenerateReadRequests(); + void FinalizeGets(std::vector); + + bool m_WriterIsRowMajor = 1; + bool m_ReaderIsRowMajor = 1; + core::Engine *m_Engine = NULL; + + template + std::vector::BPInfo> + BlocksInfo(const core::Variable &variable, const size_t step) const; + +private: + struct BP5VarRec + { + void *Variable = NULL; + char *VarName = NULL; + size_t DimCount = 0; + DataType Type; + int ElementSize = 0; + size_t *GlobalDims = NULL; + std::vector PerWriterMetaFieldOffset; + std::vector PerWriterBlockStart; + std::vector PerWriterBlockCount; + std::vector PerWriterStart; + std::vector PerWriterCounts; + std::vector PerWriterIncomingData; + std::vector PerWriterIncomingSize; // important for compression + std::vector PerWriterDataLocation; + BP5VarRec(int WriterSize) + { + PerWriterMetaFieldOffset.resize(WriterSize); + PerWriterBlockStart.resize(WriterSize); + PerWriterBlockCount.resize(WriterSize); + PerWriterStart.resize(WriterSize); + PerWriterCounts.resize(WriterSize); + PerWriterIncomingData.resize(WriterSize); + PerWriterIncomingSize.resize(WriterSize); + PerWriterDataLocation.resize(WriterSize); + } + }; + + struct ControlStruct + { + int FieldIndex; + int FieldOffset; + BP5VarRec *VarRec; + int IsArray; + DataType Type; + int ElementSize; + }; + + struct ControlInfo + { + FMFormat Format; + int ControlCount; + struct ControlInfo *Next; + struct ControlStruct Controls[1]; + }; + + enum WriterDataStatusEnum + { + Empty = 0, + Needed = 1, + Requested = 2, + Full = 3 + }; + + struct FFSReaderPerWriterRec + { + enum WriterDataStatusEnum Status = Empty; + char *RawBuffer = NULL; + }; + + FFSContext ReaderFFSContext; + int m_WriterCohortSize; + std::unordered_map VarByName; + std::unordered_map VarByKey; + FMContext LocalFMContext; + // Ffsarrayrequest PendingVarRequests; + + std::vector MetadataBaseAddrs; + std::vector MetadataFieldLists; + std::vector DataBaseAddrs; + std::vector WriterInfo; + // struct ControlInfo *ControlBlocks; + + ControlInfo *ControlBlocks = nullptr; + ControlInfo *GetPriorControl(FMFormat Format); + ControlInfo *BuildControl(FMFormat Format); + bool NameIndicatesArray(const char *Name); + DataType TranslateFFSType2ADIOS(const char *Type, int size); + BP5VarRec *LookupVarByKey(void *Key); + BP5VarRec *LookupVarByName(const char *Name); + BP5VarRec *CreateVarRec(const char *ArrayName); + void ReverseDimensions(size_t *Dimensions, int count); + void BreakdownVarName(const char *Name, char **base_name_p, + DataType *type_p, int *element_size_p); + void BreakdownArrayName(const char *Name, char **base_name_p, + DataType *type_p, int *element_size_p); + void *VarSetup(core::Engine *engine, const char *variableName, + const DataType type, void *data); + void *ArrayVarSetup(core::Engine *engine, const char *variableName, + const DataType type, int DimCount, size_t *Shape, + size_t *Start, size_t *Count); + void MapGlobalToLocalIndex(size_t Dims, const size_t *GlobalIndex, + const size_t *LocalOffsets, size_t *LocalIndex); + int FindOffset(size_t Dims, const size_t *Size, const size_t *Index); + void ExtractSelectionFromPartialRM(int ElementSize, size_t Dims, + const size_t *GlobalDims, + const size_t *PartialOffsets, + const size_t *PartialCounts, + const size_t *SelectionOffsets, + const size_t *SelectionCounts, + const char *InData, char *OutData); + void ExtractSelectionFromPartialCM(int ElementSize, size_t Dims, + const size_t *GlobalDims, + const size_t *PartialOffsets, + const size_t *PartialCounts, + const size_t *SelectionOffsets, + const size_t *SelectionCounts, + const char *InData, char *OutData); + + enum RequestTypeEnum + { + Global = 0, + Local = 1 + }; + + struct BP5ArrayRequest + { + BP5VarRec *VarRec = NULL; + enum RequestTypeEnum RequestType; + size_t BlockID; + Dims Start; + Dims Count; + void *Data; + }; + std::vector PendingRequests; + bool NeedWriter(BP5ArrayRequest Req, int i); + size_t CurTimestep = 0; + std::vector ActiveControl; +}; + +#define declare_template_instantiation(T) \ + extern template std::vector::BPInfo> \ + BP5Deserializer::BlocksInfo(const core::Variable &, const size_t) \ + const; + +ADIOS2_FOREACH_STDTYPE_1ARG(declare_template_instantiation) +#undef declare_template_instantiation + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_UTILITIES_FORMAT_BP5_BP5Serializer_H_ */ diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.tcc b/source/adios2/toolkit/format/bp5/BP5Deserializer.tcc new file mode 100644 index 0000000000..d52e6f0d9f --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.tcc @@ -0,0 +1,89 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Deserializer.tcc + * + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BP5_BP5DESERIALIZER_TCC_ +#define ADIOS2_TOOLKIT_FORMAT_BP5_BP5DESERIALIZER_TCC_ + +#include "BP5Deserializer.h" + +#include //std::reverse +#include + +#include "adios2/helper/adiosFunctions.h" + +namespace adios2 +{ +namespace format +{ + +template +std::vector::BPInfo> +BP5Deserializer::BlocksInfo(const core::Variable &variable, + const size_t step) const +{ + auto VarRec = VarByKey.find((void *)&variable)->second; + if (!VarRec) + { + return std::vector::BPInfo>(); + } + std::vector::BPInfo> Ret; + for (int WriterRank = 0; WriterRank < m_WriterCohortSize; WriterRank++) + { + const void *BaseData = MetadataBaseAddrs[WriterRank]; + struct ControlStruct *ControlArray = + &ActiveControl[WriterRank]->Controls[0]; + int i = 0; + while (ControlArray[i].VarRec != VarRec) + i++; + int FieldOffset = ControlArray[i].FieldOffset; + void *field_data = (char *)BaseData + FieldOffset; + MetaArrayRec *meta_base = (MetaArrayRec *)field_data; + for (int i = 0; i < VarRec->PerWriterBlockCount[WriterRank]; i++) + { + size_t *Offsets = NULL; + if (meta_base->Offsets) + Offsets = meta_base->Offsets + (i * meta_base->Dims); + typename core::Variable::BPInfo Tmp; + std::vector VecShape; + std::vector VecStart; + std::vector VecCount; + size_t DimCount = meta_base->Dims; + size_t *Start = Offsets; + size_t *Shape = meta_base->Shape; + size_t *Count = meta_base->Count; + if (Shape) + { + for (int i = 0; i < DimCount; i++) + { + VecShape.push_back(Shape[i]); + VecStart.push_back(Start[i]); + VecCount.push_back(Count[i]); + } + } + else + { + VecShape = {}; + VecStart = {}; + for (int i = 0; i < DimCount; i++) + { + VecCount.push_back(Count[i]); + } + } + Tmp.Shape = VecShape; + Tmp.Start = VecStart; + Tmp.Count = VecCount; + Ret.push_back(Tmp); + } + } + return Ret; +} + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_TOOLKIT_FORMAT_BP5_BP5DESERIALIZER_TCC_ */ diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp new file mode 100644 index 0000000000..fab720c2b8 --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp @@ -0,0 +1,819 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Serializer.cpp + * + */ + +#include "adios2/core/Attribute.h" +#include "adios2/core/IO.h" +#include "adios2/helper/adiosMemory.h" +#include "adios2/toolkit/format/buffer/ffs/BufferFFS.h" + +#include + +#include "BP5Serializer.h" + +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ + +BP5Serializer::BP5Serializer() { Init(); } +BP5Serializer::~BP5Serializer() +{ + if (Info.RecList) + free(Info.RecList); + if (Info.MetaFieldCount) + free_FMfield_list(Info.MetaFields); + if (Info.LocalFMContext) + free_FMcontext(Info.LocalFMContext); + if (MetadataBuf) + { + if (((FFSMetadataInfoStruct *)MetadataBuf)->BitField) + free(((FFSMetadataInfoStruct *)MetadataBuf)->BitField); + free(MetadataBuf); + } +} + +void BP5Serializer::Init() +{ + memset(&Info, 0, sizeof(Info)); + Info.RecCount = 0; + Info.RecList = (BP5Serializer::BP5WriterRec)malloc(sizeof(Info.RecList[0])); + Info.MetaFieldCount = 0; + Info.MetaFields = NULL; + Info.DataFieldCount = 0; + Info.DataFields = NULL; + Info.LocalFMContext = create_local_FMcontext(); + AddSimpleField(&Info.MetaFields, &Info.MetaFieldCount, "BitFieldCount", + "integer", sizeof(size_t)); + AddSimpleField(&Info.MetaFields, &Info.MetaFieldCount, "BitField", + "integer[BitFieldCount]", sizeof(size_t)); + AddSimpleField(&Info.MetaFields, &Info.MetaFieldCount, "DataBlockSize", + "integer", sizeof(size_t)); + RecalcMarshalStorageSize(); + + ((FFSMetadataInfoStruct *)MetadataBuf)->BitFieldCount = 0; + ((FFSMetadataInfoStruct *)MetadataBuf)->BitField = + (std::size_t *)malloc(sizeof(size_t)); + ((FFSMetadataInfoStruct *)MetadataBuf)->DataBlockSize = 0; +} +BP5Serializer::BP5WriterRec BP5Serializer::LookupWriterRec(void *Key) +{ + for (int i = 0; i < Info.RecCount; i++) + { + if (Info.RecList[i].Key == Key) + { + return &Info.RecList[i]; + } + } + + return NULL; +} + +void BP5Serializer::RecalcMarshalStorageSize() +{ + if (Info.MetaFieldCount) + { + FMFieldList LastMetaField; + size_t NewMetaSize; + LastMetaField = &Info.MetaFields[Info.MetaFieldCount - 1]; + NewMetaSize = + (LastMetaField->field_offset + LastMetaField->field_size + 7) & ~7; + MetadataBuf = realloc(MetadataBuf, NewMetaSize + 8); + memset((char *)(MetadataBuf) + MetadataSize, 0, + NewMetaSize - MetadataSize); + MetadataSize = NewMetaSize; + } +} + +void BP5Serializer::RecalcAttributeStorageSize() +{ + if (Info.AttributeFieldCount) + { + FMFieldList LastAttributeField; + size_t NewAttributeSize; + LastAttributeField = + &Info.AttributeFields[Info.AttributeFieldCount - 1]; + NewAttributeSize = (LastAttributeField->field_offset + + LastAttributeField->field_size + 7) & + ~7; + Info.AttributeData = realloc(Info.AttributeData, NewAttributeSize + 8); + memset((char *)(Info.AttributeData) + Info.AttributeSize, 0, + NewAttributeSize - Info.AttributeSize); + Info.AttributeSize = NewAttributeSize; + } +} + +void BP5Serializer::AddSimpleField(FMFieldList *FieldP, int *CountP, + const char *Name, const char *Type, + int ElementSize) +{ + int Offset = 0; + FMFieldList Field; + if (*CountP) + { + FMFieldList PriorField; + PriorField = &((*FieldP)[(*CountP) - 1]); + int PriorFieldSize = PriorField->field_size; + if (strchr(PriorField->field_type, '[')) + { + // really a pointer + PriorFieldSize = sizeof(void *); + } + Offset = + ((PriorField->field_offset + PriorFieldSize + ElementSize - 1) / + ElementSize) * + ElementSize; + } + if (*FieldP) + *FieldP = + (FMFieldList)realloc(*FieldP, (*CountP + 2) * sizeof((*FieldP)[0])); + else + *FieldP = (FMFieldList)malloc((*CountP + 2) * sizeof((*FieldP)[0])); + + Field = &((*FieldP)[*CountP]); + (*CountP)++; + Field->field_name = strdup(Name); + Field->field_type = strdup(Type); + Field->field_size = ElementSize; + Field->field_offset = Offset; + Field++; + Field->field_name = NULL; + Field->field_type = NULL; + Field->field_size = 0; + Field->field_offset = 0; +} + +typedef struct dcomplex +{ + double real_part; + double imag_part; +} dcomplex_struct; + +typedef struct fcomplex +{ + float real_part; + float imag_part; +} fcomplex_struct; + +FMField fcomplex_field_list[] = { + {"real", "float", sizeof(float), FMOffset(fcomplex_struct *, real_part)}, + {"imag", "float", sizeof(float), FMOffset(fcomplex_struct *, imag_part)}, + {NULL, NULL, 0, 0}}; + +FMField dcomplex_field_list[] = { + {"real", "float", sizeof(double), FMOffset(dcomplex_struct *, real_part)}, + {"imag", "float", sizeof(double), FMOffset(dcomplex_struct *, imag_part)}, + {NULL, NULL, 0, 0}}; + +char *BP5Serializer::ConcatName(const char *base_name, const char *postfix) +{ + char *Ret = (char *)malloc(strlen("SST_") + strlen(base_name) + + strlen(postfix) + 1); + strcpy(Ret, "SST_"); + strcat(Ret, base_name); + strcat(Ret, postfix); + return Ret; +} + +char *BP5Serializer::BuildVarName(const char *base_name, const int type, + const int element_size) +{ + int Len = strlen(base_name) + 2 + strlen("SST_") + 16; + char *Ret = (char *)malloc(Len); + sprintf(Ret, "SST%d_%d_", element_size, type); + strcat(Ret, base_name); + return Ret; +} + +void BP5Serializer::BreakdownVarName(const char *Name, char **base_name_p, + int *type_p, int *element_size_p) +{ + int Type; + int ElementSize; + const char *NameStart = strchr(strchr(Name, '_') + 1, '_') + 1; + sscanf(Name, "SST%d_%d_", &ElementSize, &Type); + *element_size_p = ElementSize; + *type_p = Type; + *base_name_p = strdup(NameStart); +} + +char *BP5Serializer::BuildArrayDimsName(const char *base_name, const int type, + const int element_size) +{ + int Len = strlen(base_name) + 3 + strlen("SST_") + 16; + char *Ret = (char *)malloc(Len); + sprintf(Ret, "SST%d_%d_", element_size, type); + strcat(Ret, base_name); + strcat(Ret, "Dims"); + return Ret; +} + +char *BP5Serializer::BuildArrayDBCountName(const char *base_name, + const int type, + const int element_size) +{ + int Len = strlen(base_name) + 3 + strlen("SST_") + 16; + char *Ret = (char *)malloc(Len); + sprintf(Ret, "SST%d_%d_", element_size, type); + strcat(Ret, base_name); + strcat(Ret, "DBCount"); + return Ret; +} + +char *BP5Serializer::BuildArrayBlockCountName(const char *base_name, + const int type, + const int element_size) +{ + int Len = strlen(base_name) + 3 + strlen("SST_") + 24; + char *Ret = (char *)malloc(Len); + sprintf(Ret, "SST%d_%d_", element_size, type); + strcat(Ret, base_name); + strcat(Ret, "BlockCount"); + return Ret; +} + +char *BP5Serializer::TranslateADIOS2Type2FFS(const DataType Type) +{ + switch (Type) + { + case DataType::None: + case DataType::Compound: + return NULL; + case DataType::Int8: + case DataType::Int16: + case DataType::Int32: + case DataType::Int64: + case DataType::Char: + return strdup("integer"); + case DataType::UInt8: + case DataType::UInt16: + case DataType::UInt32: + case DataType::UInt64: + return strdup("unsigned integer"); + case DataType::Float: + case DataType::Double: + case DataType::LongDouble: + return strdup("float"); + case DataType::FloatComplex: + return strdup("complex4"); + case DataType::DoubleComplex: + return strdup("complex8"); + case DataType::String: + return strdup("string"); + } + return 0; +} + +void BP5Serializer::AddField(FMFieldList *FieldP, int *CountP, const char *Name, + const DataType Type, int ElementSize) +{ + char *TransType = TranslateADIOS2Type2FFS(Type); + AddSimpleField(FieldP, CountP, Name, TransType, ElementSize); + free(TransType); +} + +void BP5Serializer::AddFixedArrayField(FMFieldList *FieldP, int *CountP, + const char *Name, const DataType Type, + int ElementSize, int DimCount) +{ + const char *TransType = TranslateADIOS2Type2FFS(Type); + char *TypeWithArray = (char *)malloc(strlen(TransType) + 16); + sprintf(TypeWithArray, "*(%s[%d])", TransType, DimCount); + free((void *)TransType); + AddSimpleField(FieldP, CountP, Name, TypeWithArray, sizeof(void *)); + free(TypeWithArray); + (*FieldP)[*CountP - 1].field_size = ElementSize; +} + +void BP5Serializer::AddVarArrayField(FMFieldList *FieldP, int *CountP, + const char *Name, const DataType Type, + int ElementSize, char *SizeField) +{ + char *TransType = TranslateADIOS2Type2FFS(Type); + char *TypeWithArray = + (char *)malloc(strlen(TransType) + strlen(SizeField) + 8); + sprintf(TypeWithArray, "%s[%s]", TransType, SizeField); + free(TransType); + AddSimpleField(FieldP, CountP, Name, TypeWithArray, sizeof(void *)); + free(TypeWithArray); + (*FieldP)[*CountP - 1].field_size = ElementSize; +} + +BP5Serializer::BP5WriterRec +BP5Serializer::CreateWriterRec(void *Variable, const char *Name, DataType Type, + size_t ElemSize, size_t DimCount) +{ + Info.RecList = (BP5WriterRec)realloc( + Info.RecList, (Info.RecCount + 1) * sizeof(Info.RecList[0])); + BP5WriterRec Rec = &Info.RecList[Info.RecCount]; + if (Type == DataType::String) + ElemSize = sizeof(char *); + Rec->Key = Variable; + Rec->FieldID = Info.RecCount; + Rec->DimCount = DimCount; + Rec->Type = (int)Type; + if (DimCount == 0) + { + // simple field, only add base value FMField to metadata + char *SstName = ConcatName(Name, ""); + AddField(&Info.MetaFields, &Info.MetaFieldCount, SstName, Type, + ElemSize); + free(SstName); + RecalcMarshalStorageSize(); + Rec->MetaOffset = Info.MetaFields[Info.MetaFieldCount - 1].field_offset; + Rec->DataOffset = (size_t)-1; + // Changing the formats renders these invalid + Info.MetaFormat = NULL; + } + else + { + // Array field. To Metadata, add FMFields for DimCount, Shape, Count + // and Offsets matching _MetaArrayRec + char *ArrayName = BuildArrayDimsName(Name, (int)Type, ElemSize); + char *ArrayBlockCount = + BuildArrayBlockCountName(Name, (int)Type, ElemSize); + char *ArrayDBCount = BuildArrayDBCountName(Name, (int)Type, ElemSize); + AddField(&Info.MetaFields, &Info.MetaFieldCount, ArrayName, + DataType::Int64, sizeof(size_t)); + free(ArrayName); + Rec->MetaOffset = Info.MetaFields[Info.MetaFieldCount - 1].field_offset; + char *ShapeName = ConcatName(Name, "Shape"); + char *CountName = ConcatName(Name, "Count"); + char *OffsetsName = ConcatName(Name, "Offsets"); + char *LocationsName = ConcatName(Name, "DataLocations"); + AddField(&Info.MetaFields, &Info.MetaFieldCount, ArrayBlockCount, + DataType::Int64, sizeof(size_t)); + AddField(&Info.MetaFields, &Info.MetaFieldCount, ArrayDBCount, + DataType::Int64, sizeof(size_t)); + AddFixedArrayField(&Info.MetaFields, &Info.MetaFieldCount, ShapeName, + DataType::Int64, sizeof(size_t), DimCount); + AddVarArrayField(&Info.MetaFields, &Info.MetaFieldCount, CountName, + DataType::Int64, sizeof(size_t), ArrayDBCount); + AddVarArrayField(&Info.MetaFields, &Info.MetaFieldCount, OffsetsName, + DataType::Int64, sizeof(size_t), ArrayDBCount); + AddVarArrayField(&Info.MetaFields, &Info.MetaFieldCount, LocationsName, + DataType::Int64, sizeof(size_t), ArrayBlockCount); + free(ArrayDBCount); + free(ArrayBlockCount); + free(ShapeName); + free(CountName); + free(OffsetsName); + free(LocationsName); + RecalcMarshalStorageSize(); + +#ifdef NDEF + if ((ConfigParams->CompressionMethod == SstCompressZFP) && + ZFPcompressionPossible(Type, DimCount)) + { + Type = Int8; + ElemSize = 1; + } +#endif + // To Data, add FMFields for ElemCount and Array matching _ArrayRec + char *ElemCountName = ConcatName(Name, "ElemCount"); + AddField(&Info.DataFields, &Info.DataFieldCount, ElemCountName, + DataType::Int64, sizeof(size_t)); + Rec->DataOffset = Info.DataFields[Info.DataFieldCount - 1].field_offset; + char *SstName = ConcatName(Name, ""); + AddVarArrayField(&Info.DataFields, &Info.DataFieldCount, SstName, Type, + ElemSize, ElemCountName); + free(SstName); + free(ElemCountName); + RecalcMarshalStorageSize(); + // Changing the formats renders these invalid + Info.MetaFormat = NULL; + Info.DataFormat = NULL; + } + Info.RecCount++; + return Rec; +} + +size_t *BP5Serializer::CopyDims(const size_t Count, const size_t *Vals) +{ + size_t *Ret = (size_t *)malloc(Count * sizeof(Ret[0])); + memcpy(Ret, Vals, Count * sizeof(Ret[0])); + return Ret; +} + +size_t *BP5Serializer::AppendDims(size_t *OldDims, const size_t OldCount, + const size_t Count, const size_t *Vals) +{ + size_t *Ret = + (size_t *)realloc(OldDims, (OldCount + Count) * sizeof(Ret[0])); + memcpy(Ret + OldCount, Vals, Count * sizeof(Ret[0])); + return Ret; +} + +size_t BP5Serializer::CalcSize(const size_t Count, const size_t *Vals) +{ + size_t i; + size_t Elems = 1; + for (i = 0; i < Count; i++) + { + Elems *= Vals[i]; + } + return Elems; +} + +void BP5Serializer::Marshal(void *Variable, const char *Name, + const DataType Type, size_t ElemSize, + size_t DimCount, const size_t *Shape, + const size_t *Count, const size_t *Offsets, + const void *Data, bool Sync) +{ + + FFSMetadataInfoStruct *MBase; + + BP5WriterRec Rec = LookupWriterRec(Variable); + + if (!Rec) + { + Rec = CreateWriterRec(Variable, Name, Type, ElemSize, DimCount); + } + + MBase = (struct FFSMetadataInfoStruct *)MetadataBuf; + int AlreadyWritten = FFSBitfieldTest(MBase, Rec->FieldID); + FFSBitfieldSet(MBase, Rec->FieldID); + + if (Rec->DimCount == 0) + { + if (Type != DataType::String) + memcpy((char *)(MetadataBuf) + Rec->MetaOffset, Data, ElemSize); + else + { + char **StrPtr = (char **)((char *)(MetadataBuf) + Rec->MetaOffset); + if (AlreadyWritten) + free(*StrPtr); + *StrPtr = strdup(*(char **)Data); + } + } + else + { + MetaArrayRec *MetaEntry = + (MetaArrayRec *)((char *)(MetadataBuf) + Rec->MetaOffset); + size_t ElemCount = CalcSize(DimCount, Count); + size_t DataOffset; + + /* handle metadata */ + MetaEntry->Dims = DimCount; + if (CurDataBuffer == NULL) + { + CurDataBuffer = new BufferV("data buffer"); + } + DataOffset = + CurDataBuffer->AddToVec(ElemCount * ElemSize, Data, ElemSize, Sync); + + if (!AlreadyWritten) + { + if (Shape) + MetaEntry->Shape = CopyDims(DimCount, Shape); + else + MetaEntry->Shape = NULL; + MetaEntry->DBCount = DimCount; + MetaEntry->Count = CopyDims(DimCount, Count); + MetaEntry->BlockCount = 1; + MetaEntry->DataLocation = (size_t *)malloc(sizeof(size_t)); + MetaEntry->DataLocation[0] = DataOffset; + if (Offsets) + MetaEntry->Offsets = CopyDims(DimCount, Offsets); + else + MetaEntry->Offsets = NULL; + } + else + { + /* already got some metadata, add blocks */ + size_t PreviousDBCount = MetaEntry->DBCount; + // Assume shape is still valid (modify this if shape /global + // dimensions can change ) + // Also assume Dims is always right and consistent, otherwise, + // bad things + MetaEntry->DBCount += DimCount; + MetaEntry->BlockCount++; + MetaEntry->Count = + AppendDims(MetaEntry->Count, PreviousDBCount, DimCount, Count); + MetaEntry->DataLocation = + (size_t *)realloc(MetaEntry->DataLocation, + MetaEntry->BlockCount * sizeof(size_t)); + MetaEntry->DataLocation[MetaEntry->BlockCount - 1] = DataOffset; + if (Offsets) + MetaEntry->Offsets = AppendDims( + MetaEntry->Offsets, PreviousDBCount, DimCount, Offsets); + } + + // if ((Stream->ConfigParams->CompressionMethod == + // SstCompressZFP) && + // ZFPcompressionPossible(Type, DimCount)) + // { +#ifdef ADIOS2_HAVE_ZFP + // /* this should never be true if ZFP is not available + // */ size_t ByteCount; char *Output = + // FFS_ZFPCompress(Stream, Rec->DimCount, Rec->Type, + // (void *)Data, Count, + // &ByteCount); + // DataEntry->ElemCount = ByteCount; + // DataEntry->Array = Output; +#endif + // } + // else + } +} + +void BP5Serializer::MarshalAttribute(const char *Name, const DataType Type, + size_t ElemSize, size_t ElemCount, + const void *Data) +{ + + const char *AttrString = NULL; + const void *DataAddress = Data; + + NewAttribute = true; + if (Type == DataType::String) + { + ElemSize = sizeof(char *); + AttrString = (char *)Data; + DataAddress = (const char *)&AttrString; + } + if (ElemCount == (size_t)(-1)) + { + // simple field, only simple attribute name and value + char *SstName = BuildVarName(Name, (int)Type, ElemSize); + AddField(&Info.AttributeFields, &Info.AttributeFieldCount, SstName, + Type, ElemSize); + free(SstName); + RecalcAttributeStorageSize(); + int DataOffset = + Info.AttributeFields[Info.AttributeFieldCount - 1].field_offset; + memcpy((char *)(Info.AttributeData) + DataOffset, DataAddress, + ElemSize); + } + else + { + /* // Array field. To Metadata, add FMFields for DimCount, Shape, Count + */ + /* // and Offsets matching _MetaArrayRec */ + /* char *ArrayName = BuildStaticArrayName(Name, Type, ElemCount); */ + /* AddField(&Info->AttributeFields, &Info->AttributeFieldCount, + * ArrayName, Type, */ + /* sizeof(size_t)); */ + /* free(ArrayName); */ + /* Rec->MetaOffset = */ + /* Info->MetaFields[Info->MetaFieldCount - 1].field_offset; */ + /* char *ShapeName = ConcatName(Name, "Shape"); */ + /* char *CountName = ConcatName(Name, "Count"); */ + /* char *OffsetsName = ConcatName(Name, "Offsets"); */ + /* AddFixedArrayField(&Info->MetaFields, &Info->MetaFieldCount, + * ShapeName, */ + /* "integer", sizeof(size_t), DimCount); */ + /* AddFixedArrayField(&Info->MetaFields, &Info->MetaFieldCount, + * CountName, */ + /* "integer", sizeof(size_t), DimCount); */ + /* AddFixedArrayField(&Info->MetaFields, &Info->MetaFieldCount, */ + /* OffsetsName, "integer", sizeof(size_t), DimCount); + */ + /* free(ShapeName); */ + /* free(CountName); */ + /* free(OffsetsName); */ + /* RecalcMarshalStorageSize(Stream); */ + + /* if ((Stream->ConfigParams->CompressionMethod == SstCompressZFP) && */ + /* ZFPcompressionPossible(Type, DimCount)) */ + /* { */ + /* Type = "char"; */ + /* ElemSize = 1; */ + /* } */ + /* // To Data, add FMFields for ElemCount and Array matching _ArrayRec + */ + /* char *ElemCountName = ConcatName(Name, "ElemCount"); */ + /* AddField(&Info->DataFields, &Info->DataFieldCount, ElemCountName, */ + /* "integer", sizeof(size_t)); */ + /* Rec->DataOffset = */ + /* Info->DataFields[Info->DataFieldCount - 1].field_offset; */ + /* char *SstName = ConcatName(Name, ""); */ + /* AddVarArrayField(&Info->DataFields, &Info->DataFieldCount, SstName, + */ + /* Type, ElemSize, ElemCountName); */ + /* free(SstName); */ + /* free(ElemCountName); */ + /* RecalcMarshalStorageSize(Stream); */ + /* // Changing the formats renders these invalid */ + /* Info->MetaFormat = NULL; */ + /* Info->DataFormat = NULL; */ + } +} + +BP5Serializer::TimestepInfo BP5Serializer::CloseTimestep(int timestep) +{ + std::vector Formats; + if (!Info.MetaFormat && Info.MetaFieldCount) + { + MetaMetaInfoBlock Block; + FMStructDescRec struct_list[4] = { + {NULL, NULL, 0, NULL}, + {"complex4", fcomplex_field_list, sizeof(fcomplex_struct), NULL}, + {"complex8", dcomplex_field_list, sizeof(dcomplex_struct), NULL}, + {NULL, NULL, 0, NULL}}; + struct_list[0].format_name = "MetaData"; + struct_list[0].field_list = Info.MetaFields; + struct_list[0].struct_size = + FMstruct_size_field_list(Info.MetaFields, sizeof(char *)); + + FMFormat Format = + register_data_format(Info.LocalFMContext, &struct_list[0]); + Info.MetaFormat = Format; + int size; + Block.MetaMetaInfo = get_server_rep_FMformat(Format, &size); + Block.MetaMetaInfoLen = size; + Block.MetaMetaID = get_server_ID_FMformat(Format, &size); + Block.MetaMetaIDLen = size; + Formats.push_back(Block); + } + if (NewAttribute && Info.AttributeFields) + { + MetaMetaInfoBlock Block; + FMFormat Format = FMregister_simple_format( + Info.LocalFMContext, strdup("Attributes"), Info.AttributeFields, + FMstruct_size_field_list(Info.AttributeFields, sizeof(char *))); + Info.AttributeFormat = Format; + int size; + Block.MetaMetaInfo = get_server_rep_FMformat(Format, &size); + Block.MetaMetaInfoLen = size; + Block.MetaMetaID = get_server_ID_FMformat(Format, &size); + Block.MetaMetaIDLen = size; + Formats.push_back(Block); + } + // Encode Metadata and Data to create contiguous data blocks + FFSBuffer MetaEncodeBuffer = create_FFSBuffer(); + FFSBuffer AttributeEncodeBuffer = NULL; + int MetaDataSize = 0; + int AttributeSize = 0; + struct FFSMetadataInfoStruct *MBase = + (struct FFSMetadataInfoStruct *)MetadataBuf; + + if (CurDataBuffer == NULL) + { + CurDataBuffer = new BufferV("data buffer"); + } + MBase->DataBlockSize = CurDataBuffer->AddToVec( + 0, NULL, 8, true); // output block size multiple of 8, offset is size + + void *MetaDataBlock = FFSencode(MetaEncodeBuffer, Info.MetaFormat, + MetadataBuf, &MetaDataSize); + BufferFFS *Metadata = + new BufferFFS(MetaEncodeBuffer, MetaDataBlock, MetaDataSize); + + BufferFFS *AttrData = NULL; + if (NewAttribute && Info.AttributeFields) + { + AttributeEncodeBuffer = create_FFSBuffer(); + void *AttributeBlock = + FFSencode(AttributeEncodeBuffer, Info.AttributeFormat, + Info.AttributeData, &AttributeSize); + AttrData = + new BufferFFS(AttributeEncodeBuffer, AttributeBlock, AttributeSize); + // FMdump_encoded_data(Info.AttributeFormat, AttributeBlock, + // 1024000); + } + + // FMdump_encoded_data(Info.MetaFormat, MetaDataBlock, 1024000); + /* free all those copied dimensions, etc */ + MBase = (struct FFSMetadataInfoStruct *)Metadata; + size_t *tmp = MBase->BitField; + /* + * BitField value is saved away from FMfree_var_rec_elements() so that it + * isn't unnecessarily free'd. + */ + MBase->BitField = NULL; + if (Info.MetaFormat) + FMfree_var_rec_elements(Info.MetaFormat, MetadataBuf); + if (MetadataBuf && MetadataSize) + memset(MetadataBuf, 0, MetadataSize); + MBase->BitField = tmp; + NewAttribute = false; + + return {Formats, Metadata, AttrData, CurDataBuffer}; +#ifdef NDEF + SstInternalProvideTimestep(Stream, &MetaDataRec, &DataRec, Timestep, + Formats, FreeTSInfo, TSInfo, &AttributeRec, + FreeAttrInfo, AttributeEncodeBuffer); + if (AttributeEncodeBuffer) + { + free_FFSBuffer(AttributeEncodeBuffer); + } + while (Formats) + { + struct FFSFormatBlock *Tmp = Formats->Next; + free(Formats); + Formats = Tmp; + } + if (Info.AttributeFields) + free_FMfield_list(Info.AttributeFields); + Info.AttributeFields = NULL; + Info.AttributeFieldCount = 0; + if (Info.AttributeData) + free(Info.AttributeData); + Info.AttributeData = NULL; + Info.AttributeSize = 0; +#endif +} + +std::vector BP5Serializer::CopyMetadataToContiguous( + const std::vector NewMetaMetaBlocks, + const format::Buffer *MetaEncodeBuffer, uint64_t DataSize) const +{ + std::vector Ret; + uint64_t RetSize = 0; + size_t Position = 0; + int32_t NMMBCount = NewMetaMetaBlocks.size(); + RetSize += sizeof(NMMBCount); // NMMB count + + for (auto &n : NewMetaMetaBlocks) + { + RetSize += 2 * sizeof(RetSize); // sizes + RetSize += n.MetaMetaInfoLen + n.MetaMetaIDLen; + } + uint64_t AfterNMMBlocks = RetSize; + RetSize += sizeof(int64_t); // MencodeLen + RetSize += MetaEncodeBuffer->m_FixedSize; + uint64_t AfterNMMBlocks2 = RetSize; + RetSize += sizeof(DataSize); + Ret.resize(RetSize); + + helper::CopyToBuffer(Ret, Position, &NMMBCount); + + for (auto &n : NewMetaMetaBlocks) + { + int64_t IDLen = n.MetaMetaIDLen; + int64_t InfoLen = n.MetaMetaInfoLen; + helper::CopyToBuffer(Ret, Position, &IDLen); + helper::CopyToBuffer(Ret, Position, &InfoLen); + helper::CopyToBuffer(Ret, Position, n.MetaMetaID, IDLen); + helper::CopyToBuffer(Ret, Position, n.MetaMetaInfo, InfoLen); + } + + int64_t MEBSize = MetaEncodeBuffer->m_FixedSize; + helper::CopyToBuffer(Ret, Position, &MEBSize); + uint64_t MDataPos = Position; + helper::CopyToBuffer(Ret, Position, MetaEncodeBuffer->Data(), + MetaEncodeBuffer->m_FixedSize); + helper::CopyToBuffer(Ret, Position, &DataSize); + return Ret; +} + +std::vector BP5Serializer::BreakoutContiguousMetadata( + std::vector *Aggregate, const std::vector Counts, + std::vector &UniqueMetaMetaBlocks, + std::vector &DataSizes) const +{ + size_t Position = 0; + std::vector MetadataBlocks; + MetadataBlocks.reserve(Counts.size()); + DataSizes.resize(Counts.size()); + for (int Rank = 0; Rank < Counts.size(); Rank++) + { + int32_t NMMBCount; + helper::CopyFromBuffer(*Aggregate, Position, &NMMBCount); + for (int i = 0; i < NMMBCount; i++) + { + uint64_t IDLen; + uint64_t InfoLen; + helper::CopyFromBuffer(*Aggregate, Position, &IDLen); + helper::CopyFromBuffer(*Aggregate, Position, &InfoLen); + uint64_t IDPosition = Position; + uint64_t InfoPosition = Position + IDLen; + Position = InfoPosition + InfoLen; + bool Found = 0; + for (auto &o : UniqueMetaMetaBlocks) + { + if (o.MetaMetaIDLen != IDLen) + continue; + if (std::memcmp(o.MetaMetaID, Aggregate->data() + IDPosition, + IDLen) == 0) + Found = true; + } + if (!Found) + { + MetaMetaInfoBlock New = {Aggregate->data() + InfoPosition, + InfoLen, + Aggregate->data() + IDPosition, IDLen}; + UniqueMetaMetaBlocks.push_back(New); + } + } + uint64_t MEBSize; + helper::CopyFromBuffer(*Aggregate, Position, &MEBSize); + MetadataBlocks.push_back({Aggregate->data() + Position, MEBSize}); + Position += MEBSize; + helper::CopyFromBuffer(*Aggregate, Position, &DataSizes[Rank]); + } + return MetadataBlocks; +} + +} +} diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.h b/source/adios2/toolkit/format/bp5/BP5Serializer.h new file mode 100644 index 0000000000..e7ef620a81 --- /dev/null +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.h @@ -0,0 +1,157 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BP5Serializer.h + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BP5_BP5SERIALIZER_H_ +#define ADIOS2_TOOLKIT_FORMAT_BP5_BP5SERIALIZER_H_ + +#include + +#include "BP5Base.h" +#include "adios2/core/Attribute.h" +#include "adios2/core/IO.h" +#include "adios2/toolkit/format/buffer/BufferV.h" +#include "adios2/toolkit/format/buffer/heap/BufferSTL.h" +#include "atl.h" +#include "ffs.h" +#include "fm.h" +#ifdef _WIN32 +#pragma warning(disable : 4250) +#endif + +namespace adios2 +{ +namespace format +{ + +class BP5Serializer : virtual public BP5Base +{ + +public: + BP5Serializer(); + ~BP5Serializer(); + + typedef struct _TimestepInfo + { + std::vector NewMetaMetaBlocks; + Buffer *MetaEncodeBuffer; + Buffer *AttributeEncodeBuffer; + BufferV *DataBuffer; + } TimestepInfo; + + typedef struct _MetadataInfo + { + std::vector NewMetaMetaBlocks; + std::vector MetaEncodeBufferSizes; + std::vector MetaEncodeBuffers; + + std::vector AttributeEncodeBufferSizes; + std::vector AttributeEncodeBuffers; + Buffer BackingBuffer; + } AggregatedMetadataInfo; + + void Marshal(void *Variable, const char *Name, const DataType Type, + size_t ElemSize, size_t DimCount, const size_t *Shape, + const size_t *Count, const size_t *Offsets, const void *Data, + bool Sync); + void MarshalAttribute(const char *Name, const DataType Type, + size_t ElemSize, size_t ElemCount, const void *Data); + TimestepInfo CloseTimestep(int timestep); + + core::Engine *m_Engine = NULL; + + std::vector CopyMetadataToContiguous( + const std::vector NewmetaMetaBlocks, + const format::Buffer *MetaEncodeBuffer, uint64_t DataSize) const; + + std::vector BreakoutContiguousMetadata( + std::vector *Aggregate, const std::vector Counts, + std::vector &UniqueMetaMetaBlocks, + std::vector &DataSizes) const; + +private: + void Init(); + typedef struct _BP5WriterRec + { + void *Key; + int FieldID; + size_t DataOffset; + size_t MetaOffset; + int DimCount; + int Type; + } * BP5WriterRec; + + struct FFSWriterMarshalBase + { + int RecCount = 0; + BP5WriterRec RecList = NULL; + FMContext LocalFMContext; + int MetaFieldCount = 0; + FMFieldList MetaFields = NULL; + FMFormat MetaFormat; + int DataFieldCount = 0; + FMFieldList DataFields = NULL; + FMFormat DataFormat = NULL; + int AttributeFieldCount = 0; + FMFieldList AttributeFields = NULL; + FMFormat AttributeFormat = NULL; + void *AttributeData = NULL; + int AttributeSize = 0; + int CompressZFP = 0; + attr_list ZFPParams = NULL; + }; + + FFSWriterMarshalBase Info; + void *MetadataBuf = NULL; + bool NewAttribute = false; + + size_t MetadataSize = 0; + BufferV *CurDataBuffer = NULL; + std::vector PreviousMetaMetaInfoBlocks; + + BP5WriterRec LookupWriterRec(void *Key); + BP5WriterRec CreateWriterRec(void *Variable, const char *Name, + DataType Type, size_t ElemSize, + size_t DimCount); + void RecalcMarshalStorageSize(); + void RecalcAttributeStorageSize(); + void AddSimpleField(FMFieldList *FieldP, int *CountP, const char *Name, + const char *Type, int ElementSize); + void AddField(FMFieldList *FieldP, int *CountP, const char *Name, + const DataType Type, int ElementSize); + void AddFixedArrayField(FMFieldList *FieldP, int *CountP, const char *Name, + const DataType Type, int ElementSize, int DimCount); + void AddVarArrayField(FMFieldList *FieldP, int *CountP, const char *Name, + const DataType Type, int ElementSize, + char *SizeField); + char *ConcatName(const char *base_name, const char *postfix); + char *BuildVarName(const char *base_name, const int type, + const int element_size); + void BreakdownVarName(const char *Name, char **base_name_p, int *type_p, + int *element_size_p); + char *BuildArrayDimsName(const char *base_name, const int type, + const int element_size); + char *BuildArrayDBCountName(const char *base_name, const int type, + const int element_size); + char *BuildArrayBlockCountName(const char *base_name, const int type, + const int element_size); + char *TranslateADIOS2Type2FFS(const DataType Type); + size_t *CopyDims(const size_t Count, const size_t *Vals); + size_t *AppendDims(size_t *OldDims, const size_t OldCount, + const size_t Count, const size_t *Vals); + size_t CalcSize(const size_t Count, const size_t *Vals); + + typedef struct _ArrayRec + { + size_t ElemCount; + void *Array; + } ArrayRec; +}; + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_UTILITIES_FORMAT_B5_BP5Serializer_H_ */ diff --git a/source/adios2/toolkit/format/buffer/BufferV.cpp b/source/adios2/toolkit/format/buffer/BufferV.cpp new file mode 100644 index 0000000000..494f30c29e --- /dev/null +++ b/source/adios2/toolkit/format/buffer/BufferV.cpp @@ -0,0 +1,81 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BufferV.cpp + * + */ + +#include "BufferV.h" +#include + +namespace adios2 +{ +namespace format +{ + +BufferV::BufferV(const std::string type) : m_Type(type) {} + +size_t BufferV::AddToVec(const size_t size, const void *buf, int align, + bool CopyReqd) +{ + int badAlign = CurOffset % align; + if (badAlign) + { + int addAlign = align - badAlign; + char zero[16] = {0}; + AddToVec(addAlign, zero, 1, true); + } + size_t retOffset = CurOffset; + + if (size == 0) + return CurOffset; + + if (!CopyReqd) + { + // just add buf to internal version of output vector + VecEntry entry = {true, buf, 0, size}; + DataV.push_back(entry); + } + else + { + InternalBlock.Resize(m_internalPos + size, ""); + memcpy(InternalBlock.Data() + m_internalPos, buf, size); + if (DataV.size() && !DataV.back().External && + (m_internalPos == (DataV.back().Offset + DataV.back().Size))) + { + // just add to the size of the existing tail entry + DataV.back().Size += size; + } + else + { + DataV.push_back({false, NULL, m_internalPos, size}); + } + m_internalPos += size; + } + CurOffset = retOffset + size; + return retOffset; +} + +uint64_t BufferV::Size() noexcept { return CurOffset; } + +BufferV::BufferV_iovec BufferV::DataVec() noexcept +{ + BufferV_iovec ret = new iovec[DataV.size() + 1]; + for (std::size_t i = 0; i < DataV.size(); ++i) + { + if (DataV[i].External) + { + ret[i].iov_base = DataV[i].Base; + } + else + { + ret[i].iov_base = InternalBlock.Data() + DataV[i].Offset; + } + ret[i].iov_len = DataV[i].Size; + } + ret[DataV.size()] = {NULL, 0}; + return ret; +} +} // end namespace format +} // end namespace adios2 diff --git a/source/adios2/toolkit/format/buffer/BufferV.h b/source/adios2/toolkit/format/buffer/BufferV.h new file mode 100644 index 0000000000..f64f342402 --- /dev/null +++ b/source/adios2/toolkit/format/buffer/BufferV.h @@ -0,0 +1,59 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BUFFER_BUFFERV_H_ +#define ADIOS2_TOOLKIT_FORMAT_BUFFER_BUFFERV_H_ + +#include "adios2/common/ADIOSConfig.h" +#include "adios2/common/ADIOSTypes.h" +#include "heap/BufferSTL.h" + +namespace adios2 +{ +namespace format +{ + +class BufferV +{ +public: + const std::string m_Type; + + typedef struct iovec + { + const void + *iov_base; // Base address of a memory region for input or output. + size_t iov_len; // The size of the memory pointed to by iov_base. + } * BufferV_iovec; + + uint64_t Size() noexcept; + + BufferV(const std::string type); + virtual ~BufferV() = default; + + virtual BufferV_iovec DataVec() noexcept; + // virtual const BufferV_iovec DataVec() const noexcept; + + virtual size_t AddToVec(const size_t size, const void *buf, int align, + bool CopyReqd); + +private: + struct VecEntry + { + bool External; + const void *Base; + size_t Offset; + size_t Size; + }; + std::vector DataV; + size_t CurOffset = 0; + size_t m_internalPos = 0; + BufferSTL InternalBlock; +}; + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_TOOLKIT_FORMAT_BUFFER_BUFFERV_H_ */ diff --git a/source/adios2/toolkit/format/buffer/ffs/BufferFFS.cpp b/source/adios2/toolkit/format/buffer/ffs/BufferFFS.cpp new file mode 100644 index 0000000000..2268d40894 --- /dev/null +++ b/source/adios2/toolkit/format/buffer/ffs/BufferFFS.cpp @@ -0,0 +1,33 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BufferFFS.cpp + * + */ + +#include "BufferFFS.h" +#include +#include + +namespace adios2 +{ +namespace format +{ + +BufferFFS::BufferFFS(FFSBuffer Buf, void *data, size_t len) +: Buffer("BufferFFS", len) +{ + m_buffer = Buf; + m_data = data; +} + +BufferFFS::~BufferFFS() { free_FFSBuffer(m_buffer); } + +char *BufferFFS::Data() noexcept { return (char *)m_data; } + +const char *BufferFFS::Data() const noexcept { return (const char *)m_data; } + +void BufferFFS::Delete() { free_FFSBuffer(m_buffer); } +} // end namespace format +} // end namespace adios2 diff --git a/source/adios2/toolkit/format/buffer/ffs/BufferFFS.h b/source/adios2/toolkit/format/buffer/ffs/BufferFFS.h new file mode 100644 index 0000000000..c0dc32d466 --- /dev/null +++ b/source/adios2/toolkit/format/buffer/ffs/BufferFFS.h @@ -0,0 +1,42 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BufferFFS.h + * + * Created on: Sep 26, 2017 + * Author: William F Godoy godoywf@ornl.gov + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERFFS_H_ +#define ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERFFS_H_ + +#include "adios2/toolkit/format/buffer/Buffer.h" + +#include "adios2/common/ADIOSMacros.h" +#include "ffs.h" +#include "fm.h" + +namespace adios2 +{ +namespace format +{ + +class BufferFFS : public Buffer +{ +public: + FFSBuffer m_buffer = NULL; + void *m_data = NULL; + BufferFFS(FFSBuffer Buf, void *data, size_t length); + ~BufferFFS(); + + char *Data() noexcept final; + const char *Data() const noexcept final; + + void Delete(); +}; + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERFFS_H_ */ diff --git a/source/adios2/toolkit/format/buffer/ffs/BufferSTL.tcc b/source/adios2/toolkit/format/buffer/ffs/BufferSTL.tcc new file mode 100644 index 0000000000..6b185f6b60 --- /dev/null +++ b/source/adios2/toolkit/format/buffer/ffs/BufferSTL.tcc @@ -0,0 +1,57 @@ +/* + * Distributed under the OSI-approved Apache License, Version 2.0. See + * accompanying file Copyright.txt for details. + * + * BufferSTL.tcc + * + * Created on: Sep 18, 2019 + * Author: William F Godoy godoywf@ornl.gov + */ + +#ifndef ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERSTL_TCC_ +#define ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERSTL_TCC_ + +#include "BufferSTL.h" + +#include + +#ifdef _WIN32 +#pragma warning(disable : 4146) // Windows complains about unsigned minus +#endif + +namespace adios2 +{ +namespace format +{ + +template +size_t BufferSTL::Align() const noexcept +{ + // std::align implementation from llvm libc++ + // needed due to bug in gcc 4.8 + auto lf_align = [](const size_t alignment, const size_t size, void *&ptr, + size_t &space) { + if (size <= space) + { + const char *p1 = static_cast(ptr); + const char *p2 = reinterpret_cast( + reinterpret_cast(p1 + (alignment - 1)) & -alignment); + const size_t d = static_cast(p2 - p1); + if (d <= space - size) + { + space -= d; + } + } + }; + + void *currentAddress = reinterpret_cast( + const_cast(m_Buffer.data() + m_Position)); + size_t size = GetAvailableSize(); + lf_align(alignof(T), sizeof(T), currentAddress, size); + return GetAvailableSize() - size; +} + +} // end namespace format +} // end namespace adios2 + +#endif /* ADIOS2_TOOLKIT_FORMAT_BUFFER_HEAP_BUFFERSTL_TCC_ */ diff --git a/source/adios2/toolkit/sst/cp/cp_common.c b/source/adios2/toolkit/sst/cp/cp_common.c index 924db01ba1..c86cf86bc3 100644 --- a/source/adios2/toolkit/sst/cp/cp_common.c +++ b/source/adios2/toolkit/sst/cp/cp_common.c @@ -166,7 +166,7 @@ void CP_validateParams(SstStream Stream, SstParams Params, int Writer) } static char *SstRegStr[] = {"File", "Screen", "Cloud"}; -static char *SstMarshalStr[] = {"FFS", "BP"}; +static char *SstMarshalStr[] = {"FFS", "BP", "BP5"}; static char *SstQueueFullStr[] = {"Block", "Discard"}; static char *SstCompressStr[] = {"None", "ZFP"}; static char *SstCommPatternStr[] = {"Min", "Peer"}; diff --git a/source/adios2/toolkit/sst/cp/cp_internal.h b/source/adios2/toolkit/sst/cp/cp_internal.h index d8d2e9d9ab..2af2246344 100644 --- a/source/adios2/toolkit/sst/cp/cp_internal.h +++ b/source/adios2/toolkit/sst/cp/cp_internal.h @@ -208,6 +208,11 @@ struct _SstStream int FinalTimestep; int CurrentWorkingTimestep; SstFullMetadata CurrentMetadata; + struct _SstMetaMetaBlockInternal *InternalMetaMetaInfo; + int InternalMetaMetaCount; + struct _SstBlock *InternalAttrDataInfo; + int AttrsRetrieved; + int InternalAttrDataCount; struct _SstParams *WriterConfigParams; void *ParamsBlock; int CommPatternLocked; diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 3d0cd95436..a32f8813db 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -1,2139 +1,2275 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "adios2/common/ADIOSConfig.h" -#include -#include -#include - -#include "sst.h" - -#include "adios2/toolkit/profiling/taustubs/taustubs.h" -#include "cp_internal.h" - -#define gettid() pthread_self() -#ifdef MUTEX_DEBUG -#define STREAM_MUTEX_LOCK(Stream) \ - { \ - fprintf(stderr, "(PID %lx, TID %lx) CP_READER Trying lock line %d\n", \ - (long)getpid(), (long)gettid(), __LINE__); \ - pthread_mutex_lock(&Stream->DataLock); \ - Stream->Locked++; \ - fprintf(stderr, "(PID %lx, TID %lx) CP_READER Got lock\n", \ - (long)getpid(), (long)gettid()); \ - } - -#define STREAM_MUTEX_UNLOCK(Stream) \ - { \ - fprintf(stderr, "(PID %lx, TID %lx) CP_READER UNlocking line %d\n", \ - (long)getpid(), (long)gettid(), __LINE__); \ - Stream->Locked--; \ - pthread_mutex_unlock(&Stream->DataLock); \ - } -#define STREAM_CONDITION_WAIT(Stream) \ - { \ - fprintf( \ - stderr, \ - "(PID %lx, TID %lx) CP_READER Dropping Condition Lock line %d\n", \ - (long)getpid(), (long)gettid(), __LINE__); \ - Stream->Locked = 0; \ - pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); \ - fprintf( \ - stderr, \ - "(PID %lx, TID %lx) CP_READER Acquired Condition Lock line %d\n", \ - (long)getpid(), (long)gettid(), __LINE__); \ - Stream->Locked = 1; \ - } -#define STREAM_CONDITION_SIGNAL(Stream) \ - { \ - assert(Stream->Locked == 1); \ - fprintf(stderr, \ - "(PID %lx, TID %lx) CP_READER Signalling Condition line %d\n", \ - (long)getpid(), (long)gettid(), __LINE__); \ - pthread_cond_signal(&Stream->DataCondition); \ - } - -#define STREAM_ASSERT_LOCKED(Stream) \ - { \ - assert(Stream->Locked == 1); \ - } -#else -#define STREAM_MUTEX_LOCK(Stream) \ - { \ - pthread_mutex_lock(&Stream->DataLock); \ - } -#define STREAM_MUTEX_UNLOCK(Stream) \ - { \ - pthread_mutex_unlock(&Stream->DataLock); \ - } -#define STREAM_CONDITION_WAIT(Stream) \ - { \ - pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); \ - } -#define STREAM_CONDITION_SIGNAL(Stream) \ - { \ - pthread_cond_signal(&Stream->DataCondition); \ - } -#define STREAM_ASSERT_LOCKED(Stream) -#endif - -static char *readContactInfoFile(const char *Name, SstStream Stream, - int Timeout) -{ - size_t len = strlen(Name) + strlen(SST_POSTFIX) + 1; - char *FileName = malloc(len); - int Badfile = 0; - int ZeroCount = 0; - FILE *WriterInfo; - int64_t TimeoutRemaining = Timeout * 1000 * 1000; - int64_t WaitWarningRemaining = 5 * 1000 * 1000; - long SleepInterval = 100000; - snprintf(FileName, len, "%s" SST_POSTFIX, Name); - CP_verbose(Stream, PerRankVerbose, - "Looking for writer contact in file %s, with timeout %d secs\n", - FileName, Timeout); -redo: - WriterInfo = fopen(FileName, "r"); - while (!WriterInfo) - { - // CMusleep(Stream->CPInfo->cm, SleepInterval); - usleep(SleepInterval); - TimeoutRemaining -= SleepInterval; - WaitWarningRemaining -= SleepInterval; - if (WaitWarningRemaining == 0) - { - fprintf(stderr, - "ADIOS2 SST Engine waiting for contact information " - "file %s to be created\n", - Name); - } - if (TimeoutRemaining <= 0) - { - free(FileName); - return NULL; - } - WriterInfo = fopen(FileName, "r"); - } - struct stat Buf; - fstat(fileno(WriterInfo), &Buf); - int Size = Buf.st_size; - if (Size == 0) - { - // Try again, it might look zero momentarily, but shouldn't stay that - // way. - ZeroCount++; - if (ZeroCount < 5) - { - // We'll give it several attempts (and some time) to go non-zero - usleep(SleepInterval); - goto redo; - } - } - - if (Size < strlen(SSTMAGICV0)) - { - Badfile++; - } - else - { - char Tmp[strlen(SSTMAGICV0)]; - if (fread(Tmp, strlen(SSTMAGICV0), 1, WriterInfo) != 1) - { - fprintf(stderr, - "Filesystem read failed in SST Open, failing operation\n"); - fclose(WriterInfo); - Badfile++; - } - Size -= strlen(SSTMAGICV0); - if (strncmp(Tmp, SSTMAGICV0, strlen(SSTMAGICV0)) != 0) - { - Badfile++; - } - } - if (Badfile) - { - fprintf(stderr, - "!!! File %s is not an ADIOS2 SST Engine Contact file\n", - FileName); - free(FileName); - fclose(WriterInfo); - return NULL; - } - free(FileName); - char *Buffer = calloc(1, Size + 1); - if (fread(Buffer, Size, 1, WriterInfo) != 1) - { - fprintf(stderr, - "Filesystem read failed in SST Open, failing operation\n"); - free(Buffer); - fclose(WriterInfo); - return NULL; - } - fclose(WriterInfo); - return Buffer; -} - -static char *readContactInfoScreen(const char *Name, SstStream Stream) -{ - char Input[10240]; - char *Skip = Input; - fprintf(stdout, - "Please enter the contact information associated with SST " - "input stream \"%s\":\n", - Name); - if (fgets(Input, sizeof(Input), stdin) == NULL) - { - fprintf(stdout, "Read from stdin failed, exiting\n"); - exit(1); - } - while (isspace(*Skip)) - Skip++; - return strdup(Skip); -} - -static char *readContactInfo(const char *Name, SstStream Stream, int Timeout) -{ - switch (Stream->RegistrationMethod) - { - case SstRegisterFile: - return readContactInfoFile(Name, Stream, Timeout); - case SstRegisterScreen: - return readContactInfoScreen(Name, Stream); - case SstRegisterCloud: - /* not yet */ - return NULL; - } - return NULL; -} - -// ReaderConnCloseHandler is called by the network handler thread in -// response to the failure of a network connection to the writer. -extern void ReaderConnCloseHandler(CManager cm, CMConnection ClosedConn, - void *client_data) -{ - TAU_START_FUNC(); - SstStream Stream = (SstStream)client_data; - int FailedPeerRank = -1; - STREAM_MUTEX_LOCK(Stream); - CP_verbose(Stream, PerRankVerbose, "Reader-side close handler invoked\n"); - if ((Stream->Status == Destroyed) || (!Stream->ConnectionsToWriter)) - { - STREAM_MUTEX_UNLOCK(Stream); - return; - } - for (int i = 0; i < Stream->WriterCohortSize; i++) - { - if (Stream->ConnectionsToWriter[i].CMconn == ClosedConn) - { - FailedPeerRank = i; - } - } - - if (Stream->Status == Established) - { - if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && - (Stream->Rank != 0)) - { - CP_verbose(Stream, PerRankVerbose, - "Reader-side Rank received a " - "connection-close event during normal " - "operations, but might be part of shutdown " - "Don't change stream status.\n"); - /* if this happens and *is* a failure, we'll get the status from - * rank 0 later */ - } - else - { - /* - * tag our reader instance as failed, IFF this came from someone we - * should have gotten a CLOSE from. I.E. a reverse peer - */ - CP_verbose(Stream, PerRankVerbose, - "Reader-side Rank received a " - "connection-close event during normal " - "operations, peer likely failed\n"); - if (FailedPeerRank == Stream->FailureContactRank) - { - Stream->Status = PeerFailed; - STREAM_CONDITION_SIGNAL(Stream); - } - } - CP_verbose( - Stream, PerRankVerbose, - "The close was for connection to writer peer %d, notifying DP\n", - FailedPeerRank); - STREAM_MUTEX_UNLOCK(Stream); - /* notify DP of failure. This should terminate any waits currently - * pending in the DP for that rank */ - Stream->DP_Interface->notifyConnFailure(&Svcs, Stream->DP_Stream, - FailedPeerRank); - } - else if (Stream->Status == PeerClosed) - { - /* ignore this. We expect a close after the connection is marked closed - */ - CP_verbose(Stream, PerRankVerbose, - "Reader-side Rank received a " - "connection-close event after close, " - "not unexpected\n"); - STREAM_MUTEX_UNLOCK(Stream); - // Don't notify DP, because this is part of normal shutdown and we don't - // want to kill pending reads - } - else if (Stream->Status == PeerFailed) - { - CP_verbose( - Stream, PerRankVerbose, - "Reader-side Rank received a " - "connection-close event after PeerFailed, already notified DP \n"); - // Don't notify DP, because we already have */ - STREAM_MUTEX_UNLOCK(Stream); - } - else - { - CP_verbose(Stream, CriticalVerbose, - "Got an unexpected connection close event\n"); - CP_verbose(Stream, PerStepVerbose, - "Reader-side Rank received a " - "connection-close event in unexpected " - "status %s\n", - SSTStreamStatusStr[Stream->Status]); - STREAM_MUTEX_UNLOCK(Stream); - } - TAU_STOP_FUNC(); -} - -// SstCurrentStep is only called by the main program thread and -// needs no locking as it only accesses data set by the main thread -extern long SstCurrentStep(SstStream Stream) { return Stream->ReaderTimestep; } - -static void releasePriorTimesteps(SstStream Stream, long Latest); -static void sendOneToEachWriterRank(SstStream s, CMFormat f, void *Msg, - void **WS_StreamPtr); - -static void **ParticipateInReaderInitDataExchange(SstStream Stream, - void *dpInfo, - void **ret_data_block) -{ - - struct _CP_DP_PairInfo combined_init; - struct _CP_ReaderInitInfo cpInfo; - - struct _CP_DP_PairInfo **pointers; - - cpInfo.ContactInfo = CP_GetContactString(Stream, NULL); - cpInfo.ReaderID = Stream; - - combined_init.CP_Info = (void **)&cpInfo; - combined_init.DP_Info = dpInfo; - - pointers = (struct _CP_DP_PairInfo **)CP_consolidateDataToRankZero( - Stream, &combined_init, Stream->CPInfo->PerRankReaderInfoFormat, - ret_data_block); - free(cpInfo.ContactInfo); - return (void **)pointers; -} - -static int HasAllPeers(SstStream Stream) -{ - int i, StillWaiting = 0; - if (!Stream->ConnectionsToWriter) - { - CP_verbose(Stream, PerRankVerbose, - "(PID %lx, TID %lx) Waiting for first Peer notification\n", - (long)gettid(), (long)getpid()); - return 0; - } - i = 0; - while (Stream->Peers[i] != -1) - { - int peer = Stream->Peers[i]; - if (Stream->ConnectionsToWriter[peer].CMconn == NULL) - StillWaiting++; - i++; - } - if (StillWaiting == 0) - { - CP_verbose(Stream, PerRankVerbose, - "Rank %d has all forward peer connections\n", Stream->Rank); - return 1; - } - else - { - CP_verbose(Stream, PerRankVerbose, - "Rank %d waiting for %d forward peer connections\n", - Stream->Rank, StillWaiting); - return 0; - } -} - -attr_list ContactWriter(SstStream Stream, char *Filename, SstParams Params, - SMPI_Comm comm, CMConnection *conn_p, - void **WriterFileID_p) -{ - int DataSize = 0; - attr_list RetVal = NULL; - - if (Stream->Rank == 0) - { - char *Writer0Contact = - readContactInfo(Filename, Stream, Params->OpenTimeoutSecs); - char *CMContactString = NULL; - CMConnection conn = NULL; - attr_list WriterRank0Contact; - - if (Writer0Contact) - { - - CMContactString = - malloc(strlen(Writer0Contact)); /* at least long enough */ - sscanf(Writer0Contact, "%p:%s", WriterFileID_p, CMContactString); - // printf("Writer contact info is fileID %p, contact info - // %s\n", - // WriterFileID, CMContactString); - free(Writer0Contact); - - if (globalNetinfoCallback) - { - (globalNetinfoCallback)(1, CP_GetContactString(Stream, NULL), - IPDiagString); - (globalNetinfoCallback)(2, CMContactString, NULL); - } - WriterRank0Contact = attr_list_from_string(CMContactString); - conn = CMget_conn(Stream->CPInfo->SharedCM->cm, WriterRank0Contact); - free_attr_list(WriterRank0Contact); - } - if (conn) - { - DataSize = strlen(CMContactString) + 1; - *conn_p = conn; - } - else - { - DataSize = 0; - *conn_p = NULL; - } - SMPI_Bcast(&DataSize, 1, SMPI_INT, 0, Stream->mpiComm); - if (DataSize != 0) - { - SMPI_Bcast(CMContactString, DataSize, SMPI_CHAR, 0, - Stream->mpiComm); - RetVal = attr_list_from_string(CMContactString); - } - if (CMContactString) - free(CMContactString); - } - else - { - SMPI_Bcast(&DataSize, 1, SMPI_INT, 0, Stream->mpiComm); - if (DataSize != 0) - { - char *Buffer = malloc(DataSize); - SMPI_Bcast(Buffer, DataSize, SMPI_CHAR, 0, Stream->mpiComm); - RetVal = attr_list_from_string(Buffer); - free(Buffer); - } - } - return RetVal; -} - -// SstReaderOpen is an SST reader entry point, called only by the -// main program thread It must be called by all ranks, and as it -// creates the only shared data structure, no locking is necessary -// prior to the CMCondition_wait() that is triggered in response to -// reader regsitration. -SstStream SstReaderOpen(const char *Name, SstParams Params, SMPI_Comm comm) -{ - SstStream Stream; - void *dpInfo; - struct _CP_DP_PairInfo **pointers; - void *data_block; - void *free_block; - writer_data_t ReturnData; - struct _ReaderActivateMsg Msg; - struct timeval Start, Stop, Diff; - char *Filename = strdup(Name); - CMConnection rank0_to_rank0_conn = NULL; - void *WriterFileID; - - Stream = CP_newStream(); - Stream->Role = ReaderRole; - Stream->mpiComm = comm; - - SMPI_Comm_rank(Stream->mpiComm, &Stream->Rank); - SMPI_Comm_size(Stream->mpiComm, &Stream->CohortSize); - - CP_validateParams(Stream, Params, 0 /* reader */); - Stream->ConfigParams = Params; - - Stream->DP_Interface = - SelectDP(&Svcs, Stream, Stream->ConfigParams, Stream->Rank); - - Stream->CPInfo = - CP_getCPInfo(Stream->DP_Interface, Stream->ConfigParams->ControlModule); - - Stream->FinalTimestep = INT_MAX; /* set this on close */ - Stream->LastDPNotifiedTimestep = -1; - - gettimeofday(&Start, NULL); - - attr_list WriterContactAttributes = ContactWriter( - Stream, Filename, Params, comm, &rank0_to_rank0_conn, &WriterFileID); - - if (WriterContactAttributes == NULL) - { - SstStreamDestroy(Stream); - free(Stream); - free(Filename); - return NULL; - } - - Stream->DP_Stream = Stream->DP_Interface->initReader( - &Svcs, Stream, &dpInfo, Stream->ConfigParams, WriterContactAttributes, - &Stream->Stats); - - free_attr_list(WriterContactAttributes); - - pointers = (struct _CP_DP_PairInfo **)ParticipateInReaderInitDataExchange( - Stream, dpInfo, &data_block); - - if (Stream->Rank == 0) - { - struct _CombinedWriterInfo WriterData; - struct _ReaderRegisterMsg ReaderRegister; - - memset(&ReaderRegister, 0, sizeof(ReaderRegister)); - memset(&WriterData, 0, sizeof(WriterData)); - WriterData.WriterCohortSize = -1; - ReaderRegister.WriterFile = WriterFileID; - ReaderRegister.WriterResponseCondition = - CMCondition_get(Stream->CPInfo->SharedCM->cm, rank0_to_rank0_conn); - ReaderRegister.ReaderCohortSize = Stream->CohortSize; - switch (Stream->ConfigParams->SpeculativePreloadMode) - { - case SpecPreloadOff: - case SpecPreloadOn: - ReaderRegister.SpecPreload = - (SpeculativePreloadMode) - Stream->ConfigParams->SpeculativePreloadMode; - break; - case SpecPreloadAuto: - ReaderRegister.SpecPreload = SpecPreloadOff; - if (Stream->CohortSize <= - Stream->ConfigParams->SpecAutoNodeThreshold) - { - ReaderRegister.SpecPreload = SpecPreloadOn; - } - break; - } - - ReaderRegister.CP_ReaderInfo = - malloc(ReaderRegister.ReaderCohortSize * sizeof(void *)); - ReaderRegister.DP_ReaderInfo = - malloc(ReaderRegister.ReaderCohortSize * sizeof(void *)); - for (int i = 0; i < ReaderRegister.ReaderCohortSize; i++) - { - ReaderRegister.CP_ReaderInfo[i] = - (CP_ReaderInitInfo)pointers[i]->CP_Info; - ReaderRegister.DP_ReaderInfo[i] = pointers[i]->DP_Info; - } - free(pointers); - - /* the response value is set in the handler */ - volatile struct _WriterResponseMsg *response = NULL; - CMCondition_set_client_data(Stream->CPInfo->SharedCM->cm, - ReaderRegister.WriterResponseCondition, - &response); - - if (CMwrite(rank0_to_rank0_conn, - Stream->CPInfo->SharedCM->ReaderRegisterFormat, - &ReaderRegister) != 1) - { - CP_verbose(Stream, CriticalVerbose, - "Message failed to send to writer in SstReaderOpen\n"); - } - free(ReaderRegister.CP_ReaderInfo); - free(ReaderRegister.DP_ReaderInfo); - - /* wait for "go" from writer */ - CP_verbose( - Stream, PerRankVerbose, - "Waiting for writer response message in SstReadOpen(\"%s\")\n", - Filename, ReaderRegister.WriterResponseCondition); - CMCondition_wait(Stream->CPInfo->SharedCM->cm, - ReaderRegister.WriterResponseCondition); - CP_verbose(Stream, PerRankVerbose, - "finished wait writer response message in read_open\n"); - - if (response) - { - WriterData.WriterCohortSize = response->WriterCohortSize; - WriterData.WriterConfigParams = response->WriterConfigParams; - WriterData.StartingStepNumber = response->NextStepNumber; - WriterData.CP_WriterInfo = response->CP_WriterInfo; - WriterData.DP_WriterInfo = response->DP_WriterInfo; - } - ReturnData = CP_distributeDataFromRankZero( - Stream, &WriterData, Stream->CPInfo->CombinedWriterInfoFormat, - &free_block); - } - else - { - ReturnData = CP_distributeDataFromRankZero( - Stream, NULL, Stream->CPInfo->CombinedWriterInfoFormat, - &free_block); - } - - free(data_block); - - if (ReturnData->WriterCohortSize == -1) - { - /* Rank 0 found no writer at that contact point, fail the stream */ - free(free_block); - return NULL; - } - - if (Stream->Rank == 0) - { - CP_verbose(Stream, SummaryVerbose, - "Opening Reader Stream.\nWriter stream params are:\n"); - CP_dumpParams(Stream, ReturnData->WriterConfigParams, - 0 /* writer side */); - CP_verbose(Stream, SummaryVerbose, "Reader stream params are:\n"); - CP_dumpParams(Stream, Stream->ConfigParams, 1 /* reader side */); - } - - // printf("I am reader rank %d, my info on writers is:\n", Stream->Rank); - // FMdump_data(FMFormat_of_original(Stream->CPInfo->combined_writer_Format), - // ReturnData, 1024000); - // printf("\n"); - - Stream->WriterCohortSize = ReturnData->WriterCohortSize; - Stream->WriterConfigParams = ReturnData->WriterConfigParams; - if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) && - (Stream->Rank == 0)) - { - CP_verbose(Stream, SummaryVerbose, - "Writer is doing FFS-based marshalling\n"); - } - if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalBP) && - (Stream->Rank == 0)) - { - CP_verbose(Stream, SummaryVerbose, - "Writer is doing BP-based marshalling\n"); - } - if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && - (Stream->Rank == 0)) - { - CP_verbose( - Stream, SummaryVerbose, - "Writer is using Minimum Connection Communication pattern (min)\n"); - } - if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) && - (Stream->Rank == 0)) - { - CP_verbose(Stream, SummaryVerbose, - "Writer is using Peer-based Communication pattern (peer)\n"); - } - STREAM_MUTEX_LOCK(Stream); - Stream->ReaderTimestep = ReturnData->StartingStepNumber - 1; - - if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) - { - /* - * Wait for connections and messages from writer side peers - */ - getPeerArrays(Stream->CohortSize, Stream->Rank, - Stream->WriterCohortSize, &Stream->Peers, NULL); - - while (!HasAllPeers(Stream)) - { - /* wait until we get the timestep metadata or something else changes - */ - STREAM_CONDITION_WAIT(Stream); - } - } - else - { - if (!Stream->ConnectionsToWriter) - { - Stream->ConnectionsToWriter = - calloc(sizeof(CP_PeerConnection), ReturnData->WriterCohortSize); - } - } - - for (int i = 0; i < ReturnData->WriterCohortSize; i++) - { - attr_list attrs = - attr_list_from_string(ReturnData->CP_WriterInfo[i]->ContactInfo); - Stream->ConnectionsToWriter[i].ContactList = attrs; - Stream->ConnectionsToWriter[i].RemoteStreamID = - ReturnData->CP_WriterInfo[i]->WriterID; - } - - // Deref the original connection to writer rank 0 (might still be open as a - // peer) - if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) - { - if (rank0_to_rank0_conn) - { - CMConnection_dereference(rank0_to_rank0_conn); - } - } - else - { - /* only rely on the rank 0 to rank 0 that we already have (if we're rank - * 0) */ - if (rank0_to_rank0_conn) - { - CMConnection conn = rank0_to_rank0_conn; - Stream->ConnectionsToWriter[0].CMconn = conn; - CMconn_register_close_handler(conn, ReaderConnCloseHandler, - (void *)Stream); - } - } - Stream->Status = Established; - gettimeofday(&Stop, NULL); - timersub(&Stop, &Start, &Diff); - Stream->OpenTimeSecs = (double)Diff.tv_usec / 1e6 + Diff.tv_sec; - gettimeofday(&Stream->ValidStartTime, NULL); - Stream->Filename = Filename; - Stream->ParamsBlock = free_block; - STREAM_MUTEX_UNLOCK(Stream); - AddToLastCallFreeList(Stream); - Stream->DP_Interface->provideWriterDataToReader( - &Svcs, Stream->DP_Stream, ReturnData->WriterCohortSize, - Stream->ConnectionsToWriter, ReturnData->DP_WriterInfo); - CP_verbose(Stream, PerRankVerbose, - "Sending Reader Activate messages to writer\n"); - memset(&Msg, 0, sizeof(Msg)); - sendOneToEachWriterRank(Stream, - Stream->CPInfo->SharedCM->ReaderActivateFormat, - &Msg, &Msg.WSR_Stream); - CP_verbose(Stream, PerStepVerbose, - "Finish opening Stream \"%s\", starting with Step number %d\n", - Filename, ReturnData->StartingStepNumber); - - return Stream; -} - -// SstReaderGetParams is an SST entry point only called by the main -// program thread. It can only be called after initialization and -// only accesses data installed durinig initialization, it needs no -// locking. -extern void SstReaderGetParams(SstStream Stream, - SstMarshalMethod *WriterMarshalMethod) -{ - *WriterMarshalMethod = - (SstMarshalMethod)Stream->WriterConfigParams->MarshalMethod; -} - -/* - * CP_PeerSetupHandler is called by the network handler thread in - * response to incoming PeerSetup messages to setup the reader-side - * Peer list - */ -extern void CP_PeerSetupHandler(CManager cm, CMConnection conn, void *Msg_v, - void *client_data, attr_list attrs) -{ - TAU_START_FUNC(); - SstStream Stream; - struct _PeerSetupMsg *Msg = (struct _PeerSetupMsg *)Msg_v; - Stream = (SstStream)Msg->RS_Stream; - STREAM_MUTEX_LOCK(Stream); - CP_verbose(Stream, TraceVerbose, - "Received peer setup from rank %d, conn %p\n", Msg->WriterRank, - conn); - if (!Stream->ConnectionsToWriter) - { - CP_verbose(Stream, TraceVerbose, "Allocating connections to writer\n"); - Stream->ConnectionsToWriter = - calloc(sizeof(CP_PeerConnection), Msg->WriterCohortSize); - } - CP_verbose(Stream, TraceVerbose, - "Received peer setup from rank %d, conn %p\n", Msg->WriterRank, - conn); - if (Msg->WriterRank != -1) - { - Stream->ConnectionsToWriter[Msg->WriterRank].CMconn = conn; - CMConnection_add_reference(conn); - Stream->FailureContactRank = Msg->WriterRank; - } - CMconn_register_close_handler(conn, ReaderConnCloseHandler, (void *)Stream); - STREAM_CONDITION_SIGNAL(Stream); - STREAM_MUTEX_UNLOCK(Stream); - TAU_STOP_FUNC(); -} - -void queueTimestepMetadataMsgAndNotify(SstStream Stream, - struct _TimestepMetadataMsg *tsm, - CMConnection conn) -{ - STREAM_ASSERT_LOCKED(Stream); - if (tsm->Timestep < Stream->DiscardPriorTimestep) - { - struct _ReleaseTimestepMsg Msg; - memset(&Msg, 0, sizeof(Msg)); - Msg.Timestep = tsm->Timestep; - - /* - * send each writer rank a release for this timestep (actually goes to - * WSR Streams) - */ - if (tsm->Metadata != NULL) - { - CP_verbose(Stream, PerStepVerbose, - "Sending ReleaseTimestep message for PRIOR DISCARD " - "timestep %d, one to each writer\n", - tsm->Timestep); - sendOneToEachWriterRank( - Stream, Stream->CPInfo->SharedCM->ReleaseTimestepFormat, &Msg, - &Msg.WSR_Stream); - } - else - { - CP_verbose(Stream, PerStepVerbose, - "Received discard notice for timestep %d, " - "ignoring in PRIOR DISCARD\n", - tsm->Timestep); - } - } - - struct _TimestepMetadataList *New = malloc(sizeof(struct _RequestQueue)); - New->MetadataMsg = tsm; - New->Next = NULL; - if (Stream->Timesteps) - { - struct _TimestepMetadataList *Last = Stream->Timesteps; - while (Last->Next) - { - Last = Last->Next; - } - Last->Next = New; - } - else - { - Stream->Timesteps = New; - } - Stream->Stats.TimestepMetadataReceived++; - if (tsm->Metadata) - { - Stream->Stats.MetadataBytesReceived += - (tsm->Metadata->DataSize + tsm->AttributeData->DataSize); - } - CP_verbose(Stream, PerRankVerbose, - "Received a Timestep metadata message for timestep %d, " - "signaling condition\n", - tsm->Timestep); - - STREAM_CONDITION_SIGNAL(Stream); - if ((Stream->Rank == 0) && - (Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && - (Stream->ConfigParams->AlwaysProvideLatestTimestep)) - { - /* - * IFF we are in CommMin mode, AND we are to always provide - * the newest timestep, then when a new timestep arrives then - * we want to release timesteps that are older than it, NOT - * INCLUDING ANY TIMESTEP IN CURRENT USE. - */ - CP_verbose(Stream, TraceVerbose, - "Got a new timestep in AlwaysProvideLatestTimestep mode, " - "discard older than %d\n", - tsm->Timestep); - releasePriorTimesteps(Stream, tsm->Timestep); - } -} - -// CP_TimestepMetadataHandler is called by the network handler thread -// to handle incoming TimestepMetadata messages -void CP_TimestepMetadataHandler(CManager cm, CMConnection conn, void *Msg_v, - void *client_data, attr_list attrs) -{ - TAU_START_FUNC(); - SstStream Stream; - struct _TimestepMetadataMsg *Msg = (struct _TimestepMetadataMsg *)Msg_v; - Stream = (SstStream)Msg->RS_Stream; - STREAM_MUTEX_LOCK(Stream); - if ((Stream->Rank != 0) || - (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer)) - { - /* All ranks are getting this */ - if (Msg->Metadata == NULL) - { - CP_verbose( - Stream, PerRankVerbose, - "Received a message that timestep %d has been discarded\n", - Msg->Timestep); - - /* - * before discarding, install any precious metadata from this - * message - */ - if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) - { - FFSMarshalInstallPreciousMetadata(Stream, Msg); - } - STREAM_MUTEX_UNLOCK(Stream); - - return; - } - else - { - CP_verbose( - Stream, PerStepVerbose, - "Received an incoming metadata message for timestep %d\n", - Msg->Timestep); - } - /* arrange for this message data to stay around */ - CMtake_buffer(cm, Msg); - - queueTimestepMetadataMsgAndNotify(Stream, Msg, conn); - } - else - { - /* I must be rank 0 and only I got this, I'll need to distribute it to - * everyone */ - /* arrange for this message data to stay around */ - CMtake_buffer(cm, Msg); - - queueTimestepMetadataMsgAndNotify(Stream, Msg, conn); - } - STREAM_MUTEX_UNLOCK(Stream); - TAU_STOP_FUNC(); -} - -// CP_WriterResponseHandler is called by the network handler thread to -// handle WriterResponse messages. One of these will be sent to rank0 -// reader from rank0 writer in response to the ReaderRegister message. -// It will find rank0 writer in CMCondition_wait(). It's only action -// is to associate the incoming response message to the CMcondition -// we're waiting on,m so no locking is necessary. -void CP_WriterResponseHandler(CManager cm, CMConnection conn, void *Msg_v, - void *client_data, attr_list attrs) -{ - TAU_REGISTER_THREAD(); - TAU_START_FUNC(); - struct _WriterResponseMsg *Msg = (struct _WriterResponseMsg *)Msg_v; - struct _WriterResponseMsg **response_ptr; - // fprintf(stderr, "Received a writer_response message for condition - // %d\n", - // Msg->WriterResponseCondition); - // fprintf(stderr, "The responding writer has cohort of size %d :\n", - // Msg->writer_CohortSize); - // for (int i = 0; i < Msg->writer_CohortSize; i++) { - // fprintf(stderr, " rank %d CP contact info: %s, %p\n", i, - // Msg->CP_WriterInfo[i]->ContactInfo, - // Msg->CP_WriterInfo[i]->WriterID); - // } - - /* arrange for this message data to stay around */ - CMtake_buffer(cm, Msg); - - /* attach the message to the CMCondition so it an be retrieved by the main - * thread */ - response_ptr = - CMCondition_get_client_data(cm, Msg->WriterResponseCondition); - *response_ptr = Msg; - - /* wake the main thread */ - CMCondition_signal(cm, Msg->WriterResponseCondition); - TAU_STOP_FUNC(); -} - -// CP_WriterCloseHandler is called by the network handler thread to -// handle WriterResponse messages. One of these will be sent to rank0 -// reader from rank0 writer in response to the ReaderRegister message. -// It will find rank0 writer in CMCondition_wait(). It's only action -// is to associate the incoming response message to the CMcondition -// we're waiting on, so no locking is necessary. -extern void CP_WriterCloseHandler(CManager cm, CMConnection conn, void *Msg_v, - void *client_data, attr_list attrs) -{ - TAU_START_FUNC(); - WriterCloseMsg Msg = (WriterCloseMsg)Msg_v; - SstStream Stream = (SstStream)Msg->RS_Stream; - - STREAM_MUTEX_LOCK(Stream); - CP_verbose(Stream, PerStepVerbose, - "Received a writer close message. " - "Timestep %d was the final timestep.\n", - Msg->FinalTimestep); - - Stream->FinalTimestep = Msg->FinalTimestep; - Stream->Status = PeerClosed; - /* wake anyone that might be waiting */ - STREAM_CONDITION_SIGNAL(Stream); - STREAM_MUTEX_UNLOCK(Stream); - TAU_STOP_FUNC(); -} - -// CP_CommPatternLockedHandler is called by the network handler thread -// to handle CommPatternLocked messages. It can only be called -// post-registration and won't be called after Close. Lock to protect -// against race conditions in determining comm lock scenario. -extern void CP_CommPatternLockedHandler(CManager cm, CMConnection conn, - void *Msg_v, void *client_data, - attr_list attrs) -{ - CommPatternLockedMsg Msg = (CommPatternLockedMsg)Msg_v; - SstStream Stream = (SstStream)Msg->RS_Stream; - - STREAM_MUTEX_LOCK(Stream); - CP_verbose( - Stream, PerStepVerbose, - "Received a CommPatternLocked message, beginning with Timestep %d.\n", - Msg->Timestep); - - Stream->CommPatternLocked = 1; - Stream->CommPatternLockedTimestep = Msg->Timestep; - STREAM_MUTEX_UNLOCK(Stream); -} - -static long MaxQueuedMetadata(SstStream Stream) -{ - struct _TimestepMetadataList *Next; - long MaxTimestep = -1; - STREAM_ASSERT_LOCKED(Stream); - Next = Stream->Timesteps; - if (Next == NULL) - { - CP_verbose(Stream, TraceVerbose, "MaxQueued Timestep returning -1\n"); - return -1; - } - while (Next) - { - if (Next->MetadataMsg->Timestep >= MaxTimestep) - { - MaxTimestep = Next->MetadataMsg->Timestep; - } - Next = Next->Next; - } - CP_verbose(Stream, TraceVerbose, "MaxQueued Timestep returning %ld\n", - MaxTimestep); - return MaxTimestep; -} - -static long NextQueuedMetadata(SstStream Stream) -{ - struct _TimestepMetadataList *Next; - long MinTimestep = LONG_MAX; - STREAM_ASSERT_LOCKED(Stream); - Next = Stream->Timesteps; - if (Next == NULL) - { - CP_verbose(Stream, TraceVerbose, "NextQueued Timestep returning -1\n"); - return -1; - } - while (Next) - { - if (Next->MetadataMsg->Timestep <= MinTimestep) - { - MinTimestep = Next->MetadataMsg->Timestep; - } - Next = Next->Next; - } - CP_verbose(Stream, TraceVerbose, "NextQueued Timestep returning %ld\n", - MinTimestep); - return MinTimestep; -} - -// A delayed task to wake the stream after a specific time period -static void triggerDataCondition(CManager cm, void *vStream) -{ - SstStream Stream = (SstStream)vStream; - - STREAM_MUTEX_LOCK(Stream); - /* wake the sleeping main thread for timeout */ - STREAM_CONDITION_SIGNAL(Stream); - STREAM_MUTEX_UNLOCK(Stream); -} - -static void waitForMetadataWithTimeout(SstStream Stream, float timeout_secs) -{ - struct _TimestepMetadataList *Next; - struct timeval start, now, end; - int timeout_int_sec = floor(timeout_secs); - int timeout_int_usec = ((timeout_secs - floorf(timeout_secs)) * 1000000); - CMTaskHandle TimeoutTask = NULL; - - STREAM_ASSERT_LOCKED(Stream); - gettimeofday(&start, NULL); - Next = Stream->Timesteps; - CP_verbose( - Stream, PerRankVerbose, - "Wait for metadata with timeout %g secs starting at time %ld.%06ld \n", - timeout_secs, start.tv_sec, start.tv_usec); - if (Next) - { - CP_verbose(Stream, PerRankVerbose, - "Returning from wait with timeout, NO TIMEOUT\n"); - } - end.tv_sec = start.tv_sec + timeout_int_sec; - end.tv_usec = start.tv_usec + timeout_int_usec; - if (end.tv_usec > 1000000) - { - end.tv_sec++; - end.tv_usec -= 1000000; - } - if (end.tv_sec < start.tv_sec) - { - // rollover - end.tv_sec = INT_MAX; - } - // special case - if (timeout_secs == 0.0) - { - CP_verbose( - Stream, PerRankVerbose, - "Returning from wait With no data after zero timeout poll\n"); - return; - } - - TimeoutTask = - CMadd_delayed_task(Stream->CPInfo->SharedCM->cm, timeout_int_sec, - timeout_int_usec, triggerDataCondition, Stream); - while (1) - { - Next = Stream->Timesteps; - if (Next) - { - CMremove_task(TimeoutTask); - CP_verbose(Stream, PerRankVerbose, - "Returning from wait with timeout, NO TIMEOUT\n"); - return; - } - if (Stream->Status != Established) - { - CP_verbose(Stream, PerRankVerbose, - "Returning from wait with timeout, STREAM NO " - "LONGER ESTABLISHED\n"); - return; - } - gettimeofday(&now, NULL); - CP_verbose(Stream, TraceVerbose, - "timercmp, now is %ld.%06ld end is %ld.%06ld \n", - now.tv_sec, now.tv_usec, end.tv_sec, end.tv_usec); - if (timercmp(&now, &end, >)) - { - CP_verbose(Stream, PerRankVerbose, - "Returning from wait after timing out\n"); - return; - } - /* wait until we get the timestep metadata or something else changes */ - STREAM_CONDITION_WAIT(Stream); - } - /* NOTREACHED */ -} - -static void releasePriorTimesteps(SstStream Stream, long Latest) -{ - struct _TimestepMetadataList *Next, *Last; - STREAM_ASSERT_LOCKED(Stream); - CP_verbose(Stream, PerRankVerbose, - "Releasing any timestep earlier than %d\n", Latest); - Next = Stream->Timesteps; - Last = NULL; - while (Next) - { - if ((Next->MetadataMsg->Timestep < Latest) && - (Next->MetadataMsg->Timestep != Stream->CurrentWorkingTimestep)) - { - struct _TimestepMetadataList *This = Next; - struct _ReleaseTimestepMsg Msg; - Next = This->Next; - - /* - * before discarding, install any precious metadata from this - * message - */ - if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) - { - FFSMarshalInstallPreciousMetadata(Stream, This->MetadataMsg); - } - - memset(&Msg, 0, sizeof(Msg)); - Msg.Timestep = This->MetadataMsg->Timestep; - - /* - * send each writer rank a release for this timestep (actually goes - * to WSR - * Streams) - */ - CP_verbose(Stream, PerRankVerbose, - "Sending ReleaseTimestep message for RELEASE " - "PRIOR timestep %d, one to each writer\n", - This->MetadataMsg->Timestep); - - if (Last == NULL) - { - Stream->Timesteps = Next; - } - else - { - Last->Next = Next; - } - STREAM_MUTEX_UNLOCK(Stream); - sendOneToEachWriterRank( - Stream, Stream->CPInfo->SharedCM->ReleaseTimestepFormat, &Msg, - &Msg.WSR_Stream); - if (This->MetadataMsg == NULL) - printf("READER RETURN_BUFFER, metadatamsg == %p, line %d\n", - This->MetadataMsg, __LINE__); - CMreturn_buffer(Stream->CPInfo->SharedCM->cm, This->MetadataMsg); - STREAM_MUTEX_LOCK(Stream); - free(This); - } - else - { - Last = Next; - Next = Next->Next; - } - } -} - -static void FreeTimestep(SstStream Stream, long Timestep) -{ - /* - * remove local metadata for that timestep - */ - struct _TimestepMetadataList *List = Stream->Timesteps; - - STREAM_ASSERT_LOCKED(Stream); - if (Stream->Timesteps->MetadataMsg->Timestep == Timestep) - { - Stream->Timesteps = List->Next; - if (List->MetadataMsg == NULL) - printf("READER RETURN_BUFFER, List->MEtadataMsg == %p, line %d\n", - List->MetadataMsg, __LINE__); - CMreturn_buffer(Stream->CPInfo->SharedCM->cm, List->MetadataMsg); - - free(List); - } - else - { - struct _TimestepMetadataList *last = List; - List = List->Next; - while (List != NULL) - { - if (List->MetadataMsg->Timestep == Timestep) - { - last->Next = List->Next; - if (List->MetadataMsg == NULL) - printf("READER RETURN_BUFFER, List->MEtadataMsg == %p, " - "line %d\n", - List->MetadataMsg, __LINE__); - CMreturn_buffer(Stream->CPInfo->SharedCM->cm, - List->MetadataMsg); - - free(List); - break; - } - last = List; - List = List->Next; - } - } -} - -static TSMetadataList waitForNextMetadata(SstStream Stream, long LastTimestep) -{ - TSMetadataList FoundTS = NULL; - CP_verbose(Stream, PerRankVerbose, - "Wait for next metadata after last timestep %d\n", LastTimestep); - while (1) - { - struct _TimestepMetadataList *Next; - Next = Stream->Timesteps; - while (Next) - { - CP_verbose(Stream, TraceVerbose, - "Examining metadata for Timestep %d\n", - Next->MetadataMsg->Timestep); - if (((Next->MetadataMsg->Metadata == NULL) || - (Next->MetadataMsg->Timestep < - Stream->DiscardPriorTimestep)) && - (FoundTS == NULL)) - { - /* - * Either this is a dummy timestep for something that - * was discarded on the writer side, or it is a - * timestep that satisfies DiscardPriorTimestep and - * we've already sent a release for it. Now is the - * time to install the 'precious' info that it carried - * (Attributes and formats) and then discard it. - */ - CP_verbose(Stream, PerRankVerbose, - "SstAdvanceStep installing precious " - "metadata for discarded TS %d\n", - Next->MetadataMsg->Timestep); - FFSMarshalInstallPreciousMetadata(Stream, Next->MetadataMsg); - TSMetadataList Tmp = Next; - Next = Next->Next; - FreeTimestep(Stream, Tmp->MetadataMsg->Timestep); - continue; - } - if (Next->MetadataMsg->Timestep >= LastTimestep) - { - if ((FoundTS == NULL) && - (Next->MetadataMsg->Timestep > LastTimestep)) - { - FoundTS = Next; - break; - } - else if ((FoundTS != NULL) && (FoundTS->MetadataMsg->Timestep > - Next->MetadataMsg->Timestep)) - { - FoundTS = Next; - break; - } - } - Next = Next->Next; - } - if (FoundTS) - { - CP_verbose(Stream, PerRankVerbose, - "Returning metadata for Timestep %d\n", - FoundTS->MetadataMsg->Timestep); - Stream->CurrentWorkingTimestep = FoundTS->MetadataMsg->Timestep; - return FoundTS; - } - /* didn't find a good next timestep, check Stream status */ - if ((Stream->Status != Established) || - ((Stream->FinalTimestep != INT_MAX) && - (Stream->FinalTimestep >= LastTimestep))) - { - CP_verbose(Stream, TraceVerbose, - "Stream Final Timestep is %d, last timestep was %d\n", - Stream->FinalTimestep, LastTimestep); - if (Stream->Status == NotOpen) - { - CP_verbose(Stream, PerRankVerbose, - "Wait for next metadata returning NULL because " - "channel was never fully established\n"); - } - else if (Stream->Status == PeerFailed) - { - CP_verbose(Stream, PerRankVerbose, - "Wait for next metadata returning NULL because " - "the connection failed before final timestep " - "notification\n"); - } - else - { - CP_verbose(Stream, PerStepVerbose, - "Wait for next metadata returning NULL, status %d ", - Stream->Status); - } - /* closed or failed, return NULL */ - Stream->CurrentWorkingTimestep = -1; - return NULL; - } - CP_verbose(Stream, PerRankVerbose, - "Waiting for metadata for a Timestep later than TS %d\n", - LastTimestep); - CP_verbose(Stream, TraceVerbose, - "(PID %lx, TID %lx) Stream status is %s\n", (long)getpid(), - (long)gettid(), SSTStreamStatusStr[Stream->Status]); - /* wait until we get the timestep metadata or something else changes */ - STREAM_CONDITION_WAIT(Stream); - } - /* NOTREACHED */ -} - -// SstGetCurMetadata is an SST entry point only called by the main -// program thread. Only accesses the CurrentMetadata field which is -// touched only by other subroutines called by the main program -// thread, it needs no locking. -extern SstFullMetadata SstGetCurMetadata(SstStream Stream) -{ - return Stream->CurrentMetadata; -} - -static void AddToReadStats(SstStream Stream, int Rank, long Timestep, - size_t Length) -{ - if (!Stream->RanksRead) - Stream->RanksRead = calloc(1, Stream->WriterCohortSize); - Stream->RanksRead[Rank] = 1; - Stream->Stats.BytesRead += Length; -} - -#ifndef min -#define min(a, b) (((a) < (b)) ? (a) : (b)) -#endif - -static void ReleaseTSReadStats(SstStream Stream, long Timestep) -{ - int ThisFanIn = 0; - if (Stream->RanksRead) - { - for (int i = 0; i < Stream->WriterCohortSize; i++) - { - if (Stream->RanksRead[i]) - ThisFanIn++; - } - memset(Stream->RanksRead, 0, Stream->WriterCohortSize); - } - if (Stream->Stats.TimestepsConsumed == 1) - { - Stream->Stats.RunningFanIn = ThisFanIn; - } - else - { - Stream->Stats.RunningFanIn = - Stream->Stats.RunningFanIn + - ((double)ThisFanIn - Stream->Stats.RunningFanIn) / - min(Stream->Stats.TimestepsConsumed, 100); - } -} - -// SstReadRemotememory is only called by the main -// program thread. -extern void *SstReadRemoteMemory(SstStream Stream, int Rank, long Timestep, - size_t Offset, size_t Length, void *Buffer, - void *DP_TimestepInfo) -{ - if (Stream->ConfigParams->ReaderShortCircuitReads) - return NULL; - Stream->Stats.BytesTransferred += Length; - AddToReadStats(Stream, Rank, Timestep, Length); - return Stream->DP_Interface->readRemoteMemory( - &Svcs, Stream->DP_Stream, Rank, Timestep, Offset, Length, Buffer, - DP_TimestepInfo); -} - -static void sendOneToEachWriterRank(SstStream Stream, CMFormat f, void *Msg, - void **WS_StreamPtr) -{ - if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) - { - int i = 0; - while (Stream->Peers[i] != -1) - { - int peer = Stream->Peers[i]; - CMConnection conn = Stream->ConnectionsToWriter[peer].CMconn; - /* add the writer Stream identifier to each outgoing - * message */ - *WS_StreamPtr = Stream->ConnectionsToWriter[peer].RemoteStreamID; - if (CMwrite(conn, f, Msg) != 1) - { - switch (Stream->Status) - { - case NotOpen: - case Opening: - case Established: - CP_verbose(Stream, CriticalVerbose, - "Message failed to send to writer %d (%p)\n", - peer, *WS_StreamPtr); - break; - case PeerClosed: - case PeerFailed: - case Closed: - case Destroyed: - // Don't warn on send failures for closing/closed clients - break; - } - } - i++; - } - } - else - { - if (Stream->Rank == 0) - { - int peer = 0; - CMConnection conn = Stream->ConnectionsToWriter[peer].CMconn; - /* add the writer Stream identifier to each outgoing - * message */ - *WS_StreamPtr = Stream->ConnectionsToWriter[peer].RemoteStreamID; - if (CMwrite(conn, f, Msg) != 1) - { - switch (Stream->Status) - { - case NotOpen: - case Opening: - case Established: - CP_verbose(Stream, CriticalVerbose, - "Message failed to send to writer %d (%p)\n", - peer, *WS_StreamPtr); - break; - case PeerClosed: - case PeerFailed: - case Closed: - case Destroyed: - // Don't warn on send failures for closing/closed clients - break; - } - } - } - } -} - -// SstReaderDefinitionLock is only called by the main -// program thread. -extern void SstReaderDefinitionLock(SstStream Stream, long EffectiveTimestep) -{ - struct _LockReaderDefinitionsMsg Msg; - - memset(&Msg, 0, sizeof(Msg)); - Msg.Timestep = EffectiveTimestep; - - sendOneToEachWriterRank( - Stream, Stream->CPInfo->SharedCM->LockReaderDefinitionsFormat, &Msg, - &Msg.WSR_Stream); -} - -// SstReleaseStep is only called by the main program thread. It -// locks to protect the timestep list before freeing the local -// representation of the resleased timestep. -extern void SstReleaseStep(SstStream Stream) -{ - long Timestep = Stream->ReaderTimestep; - struct _ReleaseTimestepMsg Msg; - - TAU_START_FUNC(); - STREAM_MUTEX_LOCK(Stream); - if (Stream->DP_Interface->RSReleaseTimestep) - { - (Stream->DP_Interface->RSReleaseTimestep)(&Svcs, Stream->DP_Stream, - Timestep); - } - ReleaseTSReadStats(Stream, Timestep); - STREAM_MUTEX_UNLOCK(Stream); - - if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) || - (Stream->Rank == 0)) - { - STREAM_MUTEX_LOCK(Stream); - FreeTimestep(Stream, Timestep); - STREAM_MUTEX_UNLOCK(Stream); - } - - SMPI_Barrier(Stream->mpiComm); - - memset(&Msg, 0, sizeof(Msg)); - Msg.Timestep = Timestep; - - /* - * send each writer rank a release for this timestep (actually goes to WSR - * Streams) - */ - CP_verbose( - Stream, PerRankVerbose, - "Sending ReleaseTimestep message for timestep %d, one to each writer\n", - Timestep); - sendOneToEachWriterRank(Stream, - Stream->CPInfo->SharedCM->ReleaseTimestepFormat, - &Msg, &Msg.WSR_Stream); - - if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) - { - FFSClearTimestepData(Stream); - } - TAU_STOP_FUNC(); -} - -static void NotifyDPArrivedMetadata(SstStream Stream, - struct _TimestepMetadataMsg *MetadataMsg) -{ - if ((MetadataMsg->Metadata != NULL) && - (MetadataMsg->Timestep > Stream->LastDPNotifiedTimestep)) - { - if (Stream->DP_Interface->timestepArrived) - { - Stream->DP_Interface->timestepArrived(&Svcs, Stream->DP_Stream, - MetadataMsg->Timestep, - MetadataMsg->PreloadMode); - } - Stream->LastDPNotifiedTimestep = MetadataMsg->Timestep; - } -} - -/* - * wait for metadata for Timestep indicated to arrive, or fail with EndOfStream - * or Error - */ -static SstStatusValue SstAdvanceStepPeer(SstStream Stream, SstStepMode mode, - const float timeout_sec) -{ - - TSMetadataList Entry; - - TAU_START("Waiting on metadata per rank per timestep"); - - if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) - { - struct _GlobalOpInfo - { - float timeout_sec; - int mode; - long LatestTimestep; - }; - struct _GlobalOpInfo my_info; - struct _GlobalOpInfo *global_info = NULL; - long NextTimestep; - - if (Stream->Rank == 0) - { - global_info = malloc(sizeof(my_info) * Stream->CohortSize); - CP_verbose(Stream, PerRankVerbose, - "In special case of advancestep, mode is %d, " - "Timeout Sec is %g, flt_max is %g\n", - mode, timeout_sec, FLT_MAX); - } - my_info.LatestTimestep = MaxQueuedMetadata(Stream); - my_info.timeout_sec = timeout_sec; - my_info.mode = mode; - SMPI_Gather(&my_info, sizeof(my_info), SMPI_CHAR, global_info, - sizeof(my_info), SMPI_CHAR, 0, Stream->mpiComm); - if (Stream->Rank == 0) - { - long Biggest = -1; - long Smallest = LONG_MAX; - for (int i = 0; i < Stream->CohortSize; i++) - { - if (global_info[i].LatestTimestep > Biggest) - { - Biggest = global_info[i].LatestTimestep; - } - if (global_info[i].LatestTimestep < Smallest) - { - Smallest = global_info[i].LatestTimestep; - } - } - - free(global_info); - - /* - * Several situations are possible here, depending upon - * whether or not a timeout is specified and/or - * LatestAvailable is specified, and whether or not we - * have timesteps queued anywhere. If they want - * LatestAvailable and we have any Timesteps queued - * anywhere, we decide upon a timestep to return and - * assume that all ranks will get it soon (or else we're - * in failure mode). If there are no timesteps queued - * anywhere, then we're going to wait for timeout seconds - * ON RANK 0. RANK 0 AND ONLY RANK 0 WILL DECIDE IF WE - * TIMEOUT OR RETURN WITH DATA. It is possible that other - * ranks get timestep metadata before the timeout expires, - * but we don't care. Whatever would happen on rank 0 is - * what happens everywhere. - */ - - if (Biggest == -1) - { - // AllQueuesEmpty - if (timeout_sec >= 0.0) - { - waitForMetadataWithTimeout(Stream, timeout_sec); - } - else - { - waitForMetadataWithTimeout(Stream, FLT_MAX); - } - NextTimestep = - MaxQueuedMetadata(Stream); /* might be -1 if we timed out */ - } - else - { - /* - * we've actually got a choice here. "Smallest" is - * the LatestTimestep that everyone has. "Biggest" is - * the Latest that someone has seen, and presumably - * others will see shortly. I'm going to go with Biggest - * until I have a reason to prefer one or the other. - */ - if (mode == SstLatestAvailable) - { - // latest available - CP_verbose(Stream, PerRankVerbose, - "Returning Biggest timestep available " - "%ld because LatestAvailable " - "specified\n", - Biggest); - NextTimestep = Biggest; - } - else - { - // next available (take the oldest that everyone has) - CP_verbose(Stream, PerRankVerbose, - "Returning Smallest timestep available " - "%ld because NextAvailable specified\n", - Smallest); - NextTimestep = Smallest; - } - } - if ((NextTimestep == -1) && (Stream->Status == PeerClosed)) - { - /* force everyone to close */ - NextTimestep = -2; - } - if ((NextTimestep == -1) && (Stream->Status == PeerFailed)) - { - /* force everyone to return failed */ - NextTimestep = -3; - } - SMPI_Bcast(&NextTimestep, 1, SMPI_LONG, 0, Stream->mpiComm); - } - else - { - STREAM_MUTEX_UNLOCK(Stream); - SMPI_Bcast(&NextTimestep, 1, SMPI_LONG, 0, Stream->mpiComm); - STREAM_MUTEX_LOCK(Stream); - } - if (NextTimestep == -2) - { - /* there was a peerClosed setting on rank0, we'll close */ - Stream->Status = PeerClosed; - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning EndOfStream at timestep %d\n", - Stream->ReaderTimestep); - return SstEndOfStream; - } - if (NextTimestep == -3) - { - /* there was a peerFailed setting on rank0, we'll fail */ - Stream->Status = PeerFailed; - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning EndOfStream at timestep %d\n", - Stream->ReaderTimestep); - STREAM_MUTEX_UNLOCK(Stream); - Stream->DP_Interface->notifyConnFailure(&Svcs, Stream->DP_Stream, - 0); - STREAM_MUTEX_LOCK(Stream); - return SstFatalError; - } - if (NextTimestep == -1) - { - CP_verbose(Stream, PerStepVerbose, - "AdvancestepPeer timing out on no data\n"); - return SstTimeout; - } - if (mode == SstLatestAvailable) - { - // latest available - /* release all timesteps from before NextTimestep, then fall - * through below */ - /* Side note: It is possible that someone could get a "prior" - * timestep after this point. It has to be released upon - * arrival */ - CP_verbose(Stream, PerStepVerbose, - "timed or Latest timestep, determined NextTimestep %d\n", - NextTimestep); - Stream->DiscardPriorTimestep = NextTimestep; - releasePriorTimesteps(Stream, NextTimestep); - } - } - - Entry = waitForNextMetadata(Stream, Stream->ReaderTimestep); - - TAU_STOP("Waiting on metadata per rank per timestep"); - - if (Entry) - { - NotifyDPArrivedMetadata(Stream, Entry->MetadataMsg); - - if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) - { - TAU_START("FFS marshaling case"); - FFSMarshalInstallMetadata(Stream, Entry->MetadataMsg); - TAU_STOP("FFS marshaling case"); - } - Stream->ReaderTimestep = Entry->MetadataMsg->Timestep; - SstFullMetadata Mdata = malloc(sizeof(struct _SstFullMetadata)); - memset(Mdata, 0, sizeof(struct _SstFullMetadata)); - Mdata->WriterCohortSize = Entry->MetadataMsg->CohortSize; - Mdata->WriterMetadata = - malloc(sizeof(Mdata->WriterMetadata[0]) * Mdata->WriterCohortSize); - for (int i = 0; i < Mdata->WriterCohortSize; i++) - { - Mdata->WriterMetadata[i] = &Entry->MetadataMsg->Metadata[i]; - } - if (Stream->DP_Interface->TimestepInfoFormats == NULL) - { - // DP didn't provide struct info, no valid data - Mdata->DP_TimestepInfo = NULL; - } - else - { - Mdata->DP_TimestepInfo = Entry->MetadataMsg->DP_TimestepInfo; - } - Stream->CurrentWorkingTimestep = Entry->MetadataMsg->Timestep; - Stream->CurrentMetadata = Mdata; - - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning Success on timestep %d\n", - Entry->MetadataMsg->Timestep); - return SstSuccess; - } - if (Stream->Status == PeerClosed) - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStepPeer returning EndOfStream at timestep %d\n", - Stream->ReaderTimestep); - return SstEndOfStream; - } - else - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning FatalError at timestep %d\n", - Stream->ReaderTimestep); - return SstFatalError; - } -} - -static SstStatusValue SstAdvanceStepMin(SstStream Stream, SstStepMode mode, - const float timeout_sec) -{ - TSMetadataDistributionMsg ReturnData; - struct _TimestepMetadataMsg *MetadataMsg; - SstStatusValue ret; - - void *free_block; - - if (Stream->Rank == 0) - { - struct _TimestepMetadataDistributionMsg msg; - SstStatusValue return_value = SstSuccess; - TSMetadataList RootEntry = NULL; - - memset(&msg, 0, sizeof(msg)); - msg.TSmsg = NULL; - msg.CommPatternLockedTimestep = -1; - if (Stream->CommPatternLocked == 1) - { - msg.CommPatternLockedTimestep = Stream->CommPatternLockedTimestep; - } - if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) - { - long NextTimestep = -1; - long LatestTimestep = MaxQueuedMetadata(Stream); - /* - * Several situations are possible here, depending upon - * whether or not a timeout is specified and/or - * LatestAvailable is specified, and whether or not we - * have timesteps queued anywhere. If they want - * LatestAvailable and we have any Timesteps queued - * anywhere, we decide upon a timestep to return and - * assume that all ranks will get it soon (or else we're - * in failure mode). If there are no timesteps queued - * anywhere, then we're going to wait for timeout seconds - * ON RANK 0. RANK 0 AND ONLY RANK 0 WILL DECIDE IF WE - * TIMEOUT OR RETURN WITH DATA. It is possible that other - * ranks get timestep metadata before the timeout expires, - * but we don't care. Whatever would happen on rank 0 is - * what happens everywhere. - */ - - if (LatestTimestep == -1) - { - // AllQueuesEmpty - if (timeout_sec >= 0.0) - { - waitForMetadataWithTimeout(Stream, timeout_sec); - } - else - { - waitForMetadataWithTimeout(Stream, FLT_MAX); - } - NextTimestep = - MaxQueuedMetadata(Stream); /* might be -1 if we timed out */ - } - else - { - if (mode == SstLatestAvailable) - { - // latest available - CP_verbose(Stream, PerStepVerbose, - "Returning latest timestep available " - "%ld because LatestAvailable " - "specified\n", - LatestTimestep); - NextTimestep = LatestTimestep; - } - else - { - // next available (take the oldest that everyone has) - NextTimestep = NextQueuedMetadata(Stream); - CP_verbose(Stream, PerStepVerbose, - "Returning Smallest timestep available " - "%ld because NextAvailable specified\n", - NextTimestep); - } - } - if (Stream->Status == PeerFailed) - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStepMin returning FatalError because of " - "connection failure at timestep %d\n", - Stream->ReaderTimestep); - return_value = SstFatalError; - } - else if ((NextTimestep == -1) && (Stream->Status == PeerClosed)) - { - CP_verbose( - Stream, PerStepVerbose, - "SstAdvanceStepMin returning EndOfStream at timestep %d\n", - Stream->ReaderTimestep); - return_value = SstEndOfStream; - } - else if (NextTimestep == -1) - { - CP_verbose(Stream, PerStepVerbose, - "AdvancestepMin timing out on no data\n"); - return_value = SstTimeout; - } - else if (mode == SstLatestAvailable) - { - // latest available - /* release all timesteps from before NextTimestep, then fall - * through below */ - /* Side note: It is possible that someone could get a "prior" - * timestep after this point. It has to be released upon - * arrival */ - CP_verbose( - Stream, PerStepVerbose, - "timed or Latest timestep, determined NextTimestep %d\n", - NextTimestep); - Stream->DiscardPriorTimestep = NextTimestep; - releasePriorTimesteps(Stream, NextTimestep); - } - } - if (Stream->Status == PeerFailed) - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStepMin returning FatalError because of " - "conn failure at timestep %d\n", - Stream->ReaderTimestep); - return_value = SstFatalError; - } - if (return_value == SstSuccess) - { - RootEntry = waitForNextMetadata(Stream, Stream->ReaderTimestep); - } - if (RootEntry) - { - msg.TSmsg = RootEntry->MetadataMsg; - msg.ReturnValue = return_value; - CP_verbose(Stream, TraceVerbose, - "Setting TSmsg to Rootentry value\n"); - } - else - { - if (return_value == SstSuccess) - { - if (Stream->Status == PeerClosed) - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStepMin rank 0 returning " - "EndOfStream at timestep %d\n", - Stream->ReaderTimestep); - msg.ReturnValue = SstEndOfStream; - } - else - { - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStepMin rank 0 returning " - "FatalError at timestep %d\n", - Stream->ReaderTimestep); - msg.ReturnValue = SstFatalError; - } - CP_verbose(Stream, TraceVerbose, "Setting TSmsg to NULL\n"); - msg.TSmsg = NULL; - } - else - { - msg.ReturnValue = return_value; - } - } - // AddArrivedMetadataInfo(Stream, &msg); - ReturnData = CP_distributeDataFromRankZero( - Stream, &msg, Stream->CPInfo->TimestepDistributionFormat, - &free_block); - } - else - { - - STREAM_MUTEX_UNLOCK(Stream); - ReturnData = CP_distributeDataFromRankZero( - Stream, NULL, Stream->CPInfo->CombinedWriterInfoFormat, - &free_block); - STREAM_MUTEX_LOCK(Stream); - } - ret = (SstStatusValue)ReturnData->ReturnValue; - - if (ReturnData->ReturnValue != SstSuccess) - { - if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) && - (ReturnData->TSmsg)) - { - CP_verbose( - Stream, PerRankVerbose, - "SstAdvanceStep installing precious metadata before exiting\n"); - FFSMarshalInstallPreciousMetadata(Stream, ReturnData->TSmsg); - } - - free(free_block); - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning FAILURE\n"); - return ret; - } - MetadataMsg = ReturnData->TSmsg; - - if (ReturnData->CommPatternLockedTimestep != -1) - { - Stream->CommPatternLockedTimestep = - ReturnData->CommPatternLockedTimestep; - Stream->CommPatternLocked = 2; - STREAM_MUTEX_UNLOCK(Stream); - if (Stream->DP_Interface->RSreadPatternLocked) - { - Stream->DP_Interface->RSreadPatternLocked( - &Svcs, Stream->DP_Stream, Stream->CommPatternLockedTimestep); - } - STREAM_MUTEX_LOCK(Stream); - } - if (MetadataMsg) - { - NotifyDPArrivedMetadata(Stream, MetadataMsg); - - Stream->ReaderTimestep = MetadataMsg->Timestep; - if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) - { - CP_verbose(Stream, TraceVerbose, - "Calling install metadata from metadata block %p\n", - MetadataMsg); - FFSMarshalInstallMetadata(Stream, MetadataMsg); - } - SstFullMetadata Mdata = malloc(sizeof(struct _SstFullMetadata)); - memset(Mdata, 0, sizeof(struct _SstFullMetadata)); - Mdata->WriterCohortSize = MetadataMsg->CohortSize; - Mdata->WriterMetadata = - malloc(sizeof(Mdata->WriterMetadata[0]) * Mdata->WriterCohortSize); - for (int i = 0; i < Mdata->WriterCohortSize; i++) - { - Mdata->WriterMetadata[i] = &MetadataMsg->Metadata[i]; - } - if (Stream->DP_Interface->TimestepInfoFormats == NULL) - { - // DP didn't provide struct info, no valid data - Mdata->DP_TimestepInfo = NULL; - } - else - { - Mdata->DP_TimestepInfo = MetadataMsg->DP_TimestepInfo; - } - Stream->CurrentWorkingTimestep = MetadataMsg->Timestep; - Mdata->FreeBlock = free_block; - Stream->CurrentMetadata = Mdata; - - CP_verbose(Stream, PerStepVerbose, - "SstAdvanceStep returning Success on timestep %d\n", - MetadataMsg->Timestep); - return SstSuccess; - } - CP_verbose(Stream, TraceVerbose, "SstAdvanceStep final return\n"); - return ret; -} - -// SstAdvanceStep is only called by the main program thread. -extern SstStatusValue SstAdvanceStep(SstStream Stream, const float timeout_sec) -{ - - SstStatusValue result; - STREAM_MUTEX_LOCK(Stream); - if (Stream->CurrentMetadata != NULL) - { - if (Stream->CurrentMetadata->FreeBlock) - { - free(Stream->CurrentMetadata->FreeBlock); - } - if (Stream->CurrentMetadata->WriterMetadata) - { - free(Stream->CurrentMetadata->WriterMetadata); - } - free(Stream->CurrentMetadata); - Stream->CurrentMetadata = NULL; - } - - SstStepMode mode = SstNextAvailable; - if (Stream->ConfigParams->AlwaysProvideLatestTimestep) - { - mode = SstLatestAvailable; - } - if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) - { - result = SstAdvanceStepPeer(Stream, mode, timeout_sec); - } - else - { - result = SstAdvanceStepMin(Stream, mode, timeout_sec); - } - if (result == SstSuccess) - { - Stream->Stats.TimestepsConsumed++; - } - STREAM_MUTEX_UNLOCK(Stream); - return result; -} - -// SstReaderClose is only called by the main program thread and -// needs no locking as it only accesses data set by the main thread -extern void SstReaderClose(SstStream Stream) -{ - /* need to have a reader-side shutdown protocol, but for now, just sleep for - * a little while to makes sure our release message for the last timestep - * got received */ - struct timeval CloseTime, Diff; - struct _ReaderCloseMsg Msg; - /* wait until each reader rank has done SstReaderClose() */ - SMPI_Barrier(Stream->mpiComm); - gettimeofday(&CloseTime, NULL); - timersub(&CloseTime, &Stream->ValidStartTime, &Diff); - memset(&Msg, 0, sizeof(Msg)); - sendOneToEachWriterRank(Stream, Stream->CPInfo->SharedCM->ReaderCloseFormat, - &Msg, &Msg.WSR_Stream); - Stream->Stats.StreamValidTimeSecs = - (double)Diff.tv_usec / 1e6 + Diff.tv_sec; - - if (Stream->CPVerbosityLevel >= (int)SummaryVerbose) - { - DoStreamSummary(Stream); - } - CMusleep(Stream->CPInfo->SharedCM->cm, 100000); - if (Stream->CurrentMetadata != NULL) - { - if (Stream->CurrentMetadata->FreeBlock) - free(Stream->CurrentMetadata->FreeBlock); - if (Stream->CurrentMetadata->WriterMetadata) - free(Stream->CurrentMetadata->WriterMetadata); - free(Stream->CurrentMetadata); - Stream->CurrentMetadata = NULL; - } -} - -// SstWaitForCompletion is only called by the main program thread and -// needs no locking -extern SstStatusValue SstWaitForCompletion(SstStream Stream, void *handle) -{ - if (Stream->ConfigParams->ReaderShortCircuitReads) - return SstSuccess; - if (Stream->DP_Interface->waitForCompletion(&Svcs, handle) != 1) - { - return SstFatalError; - } - else - { - return SstSuccess; - } -} +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "adios2/common/ADIOSConfig.h" +#include +#include +#include + +#include "sst.h" + +#include "adios2/toolkit/profiling/taustubs/taustubs.h" +#include "cp_internal.h" + +#define gettid() pthread_self() +#ifdef MUTEX_DEBUG +#define STREAM_MUTEX_LOCK(Stream) \ + { \ + fprintf(stderr, "(PID %lx, TID %lx) CP_READER Trying lock line %d\n", \ + (long)getpid(), (long)gettid(), __LINE__); \ + pthread_mutex_lock(&Stream->DataLock); \ + Stream->Locked++; \ + fprintf(stderr, "(PID %lx, TID %lx) CP_READER Got lock\n", \ + (long)getpid(), (long)gettid()); \ + } + +#define STREAM_MUTEX_UNLOCK(Stream) \ + { \ + fprintf(stderr, "(PID %lx, TID %lx) CP_READER UNlocking line %d\n", \ + (long)getpid(), (long)gettid(), __LINE__); \ + Stream->Locked--; \ + pthread_mutex_unlock(&Stream->DataLock); \ + } +#define STREAM_CONDITION_WAIT(Stream) \ + { \ + fprintf( \ + stderr, \ + "(PID %lx, TID %lx) CP_READER Dropping Condition Lock line %d\n", \ + (long)getpid(), (long)gettid(), __LINE__); \ + Stream->Locked = 0; \ + pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); \ + fprintf( \ + stderr, \ + "(PID %lx, TID %lx) CP_READER Acquired Condition Lock line %d\n", \ + (long)getpid(), (long)gettid(), __LINE__); \ + Stream->Locked = 1; \ + } +#define STREAM_CONDITION_SIGNAL(Stream) \ + { \ + assert(Stream->Locked == 1); \ + fprintf(stderr, \ + "(PID %lx, TID %lx) CP_READER Signalling Condition line %d\n", \ + (long)getpid(), (long)gettid(), __LINE__); \ + pthread_cond_signal(&Stream->DataCondition); \ + } + +#define STREAM_ASSERT_LOCKED(Stream) \ + { \ + assert(Stream->Locked == 1); \ + } +#else +#define STREAM_MUTEX_LOCK(Stream) \ + { \ + pthread_mutex_lock(&Stream->DataLock); \ + } +#define STREAM_MUTEX_UNLOCK(Stream) \ + { \ + pthread_mutex_unlock(&Stream->DataLock); \ + } +#define STREAM_CONDITION_WAIT(Stream) \ + { \ + pthread_cond_wait(&Stream->DataCondition, &Stream->DataLock); \ + } +#define STREAM_CONDITION_SIGNAL(Stream) \ + { \ + pthread_cond_signal(&Stream->DataCondition); \ + } +#define STREAM_ASSERT_LOCKED(Stream) +#endif + +static char *readContactInfoFile(const char *Name, SstStream Stream, + int Timeout) +{ + size_t len = strlen(Name) + strlen(SST_POSTFIX) + 1; + char *FileName = malloc(len); + int Badfile = 0; + int ZeroCount = 0; + FILE *WriterInfo; + int64_t TimeoutRemaining = Timeout * 1000 * 1000; + int64_t WaitWarningRemaining = 5 * 1000 * 1000; + long SleepInterval = 100000; + snprintf(FileName, len, "%s" SST_POSTFIX, Name); + CP_verbose(Stream, PerRankVerbose, + "Looking for writer contact in file %s, with timeout %d secs\n", + FileName, Timeout); +redo: + WriterInfo = fopen(FileName, "r"); + while (!WriterInfo) + { + // CMusleep(Stream->CPInfo->cm, SleepInterval); + usleep(SleepInterval); + TimeoutRemaining -= SleepInterval; + WaitWarningRemaining -= SleepInterval; + if (WaitWarningRemaining == 0) + { + fprintf(stderr, + "ADIOS2 SST Engine waiting for contact information " + "file %s to be created\n", + Name); + } + if (TimeoutRemaining <= 0) + { + free(FileName); + return NULL; + } + WriterInfo = fopen(FileName, "r"); + } + struct stat Buf; + fstat(fileno(WriterInfo), &Buf); + int Size = Buf.st_size; + if (Size == 0) + { + // Try again, it might look zero momentarily, but shouldn't stay that + // way. + ZeroCount++; + if (ZeroCount < 5) + { + // We'll give it several attempts (and some time) to go non-zero + usleep(SleepInterval); + goto redo; + } + } + + if (Size < strlen(SSTMAGICV0)) + { + Badfile++; + } + else + { + char Tmp[strlen(SSTMAGICV0)]; + if (fread(Tmp, strlen(SSTMAGICV0), 1, WriterInfo) != 1) + { + fprintf(stderr, + "Filesystem read failed in SST Open, failing operation\n"); + fclose(WriterInfo); + Badfile++; + } + Size -= strlen(SSTMAGICV0); + if (strncmp(Tmp, SSTMAGICV0, strlen(SSTMAGICV0)) != 0) + { + Badfile++; + } + } + if (Badfile) + { + fprintf(stderr, + "!!! File %s is not an ADIOS2 SST Engine Contact file\n", + FileName); + free(FileName); + fclose(WriterInfo); + return NULL; + } + free(FileName); + char *Buffer = calloc(1, Size + 1); + if (fread(Buffer, Size, 1, WriterInfo) != 1) + { + fprintf(stderr, + "Filesystem read failed in SST Open, failing operation\n"); + free(Buffer); + fclose(WriterInfo); + return NULL; + } + fclose(WriterInfo); + return Buffer; +} + +static char *readContactInfoScreen(const char *Name, SstStream Stream) +{ + char Input[10240]; + char *Skip = Input; + fprintf(stdout, + "Please enter the contact information associated with SST " + "input stream \"%s\":\n", + Name); + if (fgets(Input, sizeof(Input), stdin) == NULL) + { + fprintf(stdout, "Read from stdin failed, exiting\n"); + exit(1); + } + while (isspace(*Skip)) + Skip++; + return strdup(Skip); +} + +static char *readContactInfo(const char *Name, SstStream Stream, int Timeout) +{ + switch (Stream->RegistrationMethod) + { + case SstRegisterFile: + return readContactInfoFile(Name, Stream, Timeout); + case SstRegisterScreen: + return readContactInfoScreen(Name, Stream); + case SstRegisterCloud: + /* not yet */ + return NULL; + } + return NULL; +} + +// ReaderConnCloseHandler is called by the network handler thread in +// response to the failure of a network connection to the writer. +extern void ReaderConnCloseHandler(CManager cm, CMConnection ClosedConn, + void *client_data) +{ + TAU_START_FUNC(); + SstStream Stream = (SstStream)client_data; + int FailedPeerRank = -1; + STREAM_MUTEX_LOCK(Stream); + CP_verbose(Stream, PerRankVerbose, "Reader-side close handler invoked\n"); + if ((Stream->Status == Destroyed) || (!Stream->ConnectionsToWriter)) + { + STREAM_MUTEX_UNLOCK(Stream); + return; + } + for (int i = 0; i < Stream->WriterCohortSize; i++) + { + if (Stream->ConnectionsToWriter[i].CMconn == ClosedConn) + { + FailedPeerRank = i; + } + } + + if (Stream->Status == Established) + { + if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && + (Stream->Rank != 0)) + { + CP_verbose(Stream, PerRankVerbose, + "Reader-side Rank received a " + "connection-close event during normal " + "operations, but might be part of shutdown " + "Don't change stream status.\n"); + /* if this happens and *is* a failure, we'll get the status from + * rank 0 later */ + } + else + { + /* + * tag our reader instance as failed, IFF this came from someone we + * should have gotten a CLOSE from. I.E. a reverse peer + */ + CP_verbose(Stream, PerRankVerbose, + "Reader-side Rank received a " + "connection-close event during normal " + "operations, peer likely failed\n"); + if (FailedPeerRank == Stream->FailureContactRank) + { + Stream->Status = PeerFailed; + STREAM_CONDITION_SIGNAL(Stream); + } + } + CP_verbose( + Stream, PerRankVerbose, + "The close was for connection to writer peer %d, notifying DP\n", + FailedPeerRank); + STREAM_MUTEX_UNLOCK(Stream); + /* notify DP of failure. This should terminate any waits currently + * pending in the DP for that rank */ + Stream->DP_Interface->notifyConnFailure(&Svcs, Stream->DP_Stream, + FailedPeerRank); + } + else if (Stream->Status == PeerClosed) + { + /* ignore this. We expect a close after the connection is marked closed + */ + CP_verbose(Stream, PerRankVerbose, + "Reader-side Rank received a " + "connection-close event after close, " + "not unexpected\n"); + STREAM_MUTEX_UNLOCK(Stream); + // Don't notify DP, because this is part of normal shutdown and we don't + // want to kill pending reads + } + else if (Stream->Status == PeerFailed) + { + CP_verbose( + Stream, PerRankVerbose, + "Reader-side Rank received a " + "connection-close event after PeerFailed, already notified DP \n"); + // Don't notify DP, because we already have */ + STREAM_MUTEX_UNLOCK(Stream); + } + else + { + CP_verbose(Stream, CriticalVerbose, + "Got an unexpected connection close event\n"); + CP_verbose(Stream, PerStepVerbose, + "Reader-side Rank received a " + "connection-close event in unexpected " + "status %s\n", + SSTStreamStatusStr[Stream->Status]); + STREAM_MUTEX_UNLOCK(Stream); + } + TAU_STOP_FUNC(); +} + +// SstCurrentStep is only called by the main program thread and +// needs no locking as it only accesses data set by the main thread +extern long SstCurrentStep(SstStream Stream) { return Stream->ReaderTimestep; } + +static void releasePriorTimesteps(SstStream Stream, long Latest); +static void sendOneToEachWriterRank(SstStream s, CMFormat f, void *Msg, + void **WS_StreamPtr); + +static void **ParticipateInReaderInitDataExchange(SstStream Stream, + void *dpInfo, + void **ret_data_block) +{ + + struct _CP_DP_PairInfo combined_init; + struct _CP_ReaderInitInfo cpInfo; + + struct _CP_DP_PairInfo **pointers; + + cpInfo.ContactInfo = CP_GetContactString(Stream, NULL); + cpInfo.ReaderID = Stream; + + combined_init.CP_Info = (void **)&cpInfo; + combined_init.DP_Info = dpInfo; + + pointers = (struct _CP_DP_PairInfo **)CP_consolidateDataToRankZero( + Stream, &combined_init, Stream->CPInfo->PerRankReaderInfoFormat, + ret_data_block); + free(cpInfo.ContactInfo); + return (void **)pointers; +} + +static int HasAllPeers(SstStream Stream) +{ + int i, StillWaiting = 0; + if (!Stream->ConnectionsToWriter) + { + CP_verbose(Stream, PerRankVerbose, + "(PID %lx, TID %lx) Waiting for first Peer notification\n", + (long)gettid(), (long)getpid()); + return 0; + } + i = 0; + while (Stream->Peers[i] != -1) + { + int peer = Stream->Peers[i]; + if (Stream->ConnectionsToWriter[peer].CMconn == NULL) + StillWaiting++; + i++; + } + if (StillWaiting == 0) + { + CP_verbose(Stream, PerRankVerbose, + "Rank %d has all forward peer connections\n", Stream->Rank); + return 1; + } + else + { + CP_verbose(Stream, PerRankVerbose, + "Rank %d waiting for %d forward peer connections\n", + Stream->Rank, StillWaiting); + return 0; + } +} + +attr_list ContactWriter(SstStream Stream, char *Filename, SstParams Params, + SMPI_Comm comm, CMConnection *conn_p, + void **WriterFileID_p) +{ + int DataSize = 0; + attr_list RetVal = NULL; + + if (Stream->Rank == 0) + { + char *Writer0Contact = + readContactInfo(Filename, Stream, Params->OpenTimeoutSecs); + char *CMContactString = NULL; + CMConnection conn = NULL; + attr_list WriterRank0Contact; + + if (Writer0Contact) + { + + CMContactString = + malloc(strlen(Writer0Contact)); /* at least long enough */ + sscanf(Writer0Contact, "%p:%s", WriterFileID_p, CMContactString); + // printf("Writer contact info is fileID %p, contact info + // %s\n", + // WriterFileID, CMContactString); + free(Writer0Contact); + + if (globalNetinfoCallback) + { + (globalNetinfoCallback)(1, CP_GetContactString(Stream, NULL), + IPDiagString); + (globalNetinfoCallback)(2, CMContactString, NULL); + } + WriterRank0Contact = attr_list_from_string(CMContactString); + conn = CMget_conn(Stream->CPInfo->SharedCM->cm, WriterRank0Contact); + free_attr_list(WriterRank0Contact); + } + if (conn) + { + DataSize = strlen(CMContactString) + 1; + *conn_p = conn; + } + else + { + DataSize = 0; + *conn_p = NULL; + } + SMPI_Bcast(&DataSize, 1, SMPI_INT, 0, Stream->mpiComm); + if (DataSize != 0) + { + SMPI_Bcast(CMContactString, DataSize, SMPI_CHAR, 0, + Stream->mpiComm); + RetVal = attr_list_from_string(CMContactString); + } + if (CMContactString) + free(CMContactString); + } + else + { + SMPI_Bcast(&DataSize, 1, SMPI_INT, 0, Stream->mpiComm); + if (DataSize != 0) + { + char *Buffer = malloc(DataSize); + SMPI_Bcast(Buffer, DataSize, SMPI_CHAR, 0, Stream->mpiComm); + RetVal = attr_list_from_string(Buffer); + free(Buffer); + } + } + return RetVal; +} + +// SstReaderOpen is an SST reader entry point, called only by the +// main program thread It must be called by all ranks, and as it +// creates the only shared data structure, no locking is necessary +// prior to the CMCondition_wait() that is triggered in response to +// reader regsitration. +SstStream SstReaderOpen(const char *Name, SstParams Params, SMPI_Comm comm) +{ + SstStream Stream; + void *dpInfo; + struct _CP_DP_PairInfo **pointers; + void *data_block; + void *free_block; + writer_data_t ReturnData; + struct _ReaderActivateMsg Msg; + struct timeval Start, Stop, Diff; + char *Filename = strdup(Name); + CMConnection rank0_to_rank0_conn = NULL; + void *WriterFileID; + + Stream = CP_newStream(); + Stream->Role = ReaderRole; + Stream->mpiComm = comm; + Stream->AttrsRetrieved = 0; + + SMPI_Comm_rank(Stream->mpiComm, &Stream->Rank); + SMPI_Comm_size(Stream->mpiComm, &Stream->CohortSize); + + CP_validateParams(Stream, Params, 0 /* reader */); + Stream->ConfigParams = Params; + + Stream->DP_Interface = + SelectDP(&Svcs, Stream, Stream->ConfigParams, Stream->Rank); + + Stream->CPInfo = + CP_getCPInfo(Stream->DP_Interface, Stream->ConfigParams->ControlModule); + + Stream->FinalTimestep = INT_MAX; /* set this on close */ + Stream->LastDPNotifiedTimestep = -1; + + gettimeofday(&Start, NULL); + + attr_list WriterContactAttributes = ContactWriter( + Stream, Filename, Params, comm, &rank0_to_rank0_conn, &WriterFileID); + + if (WriterContactAttributes == NULL) + { + SstStreamDestroy(Stream); + free(Stream); + free(Filename); + return NULL; + } + + Stream->DP_Stream = Stream->DP_Interface->initReader( + &Svcs, Stream, &dpInfo, Stream->ConfigParams, WriterContactAttributes, + &Stream->Stats); + + free_attr_list(WriterContactAttributes); + + pointers = (struct _CP_DP_PairInfo **)ParticipateInReaderInitDataExchange( + Stream, dpInfo, &data_block); + + if (Stream->Rank == 0) + { + struct _CombinedWriterInfo WriterData; + struct _ReaderRegisterMsg ReaderRegister; + + memset(&ReaderRegister, 0, sizeof(ReaderRegister)); + memset(&WriterData, 0, sizeof(WriterData)); + WriterData.WriterCohortSize = -1; + ReaderRegister.WriterFile = WriterFileID; + ReaderRegister.WriterResponseCondition = + CMCondition_get(Stream->CPInfo->SharedCM->cm, rank0_to_rank0_conn); + ReaderRegister.ReaderCohortSize = Stream->CohortSize; + switch (Stream->ConfigParams->SpeculativePreloadMode) + { + case SpecPreloadOff: + case SpecPreloadOn: + ReaderRegister.SpecPreload = + (SpeculativePreloadMode) + Stream->ConfigParams->SpeculativePreloadMode; + break; + case SpecPreloadAuto: + ReaderRegister.SpecPreload = SpecPreloadOff; + if (Stream->CohortSize <= + Stream->ConfigParams->SpecAutoNodeThreshold) + { + ReaderRegister.SpecPreload = SpecPreloadOn; + } + break; + } + + ReaderRegister.CP_ReaderInfo = + malloc(ReaderRegister.ReaderCohortSize * sizeof(void *)); + ReaderRegister.DP_ReaderInfo = + malloc(ReaderRegister.ReaderCohortSize * sizeof(void *)); + for (int i = 0; i < ReaderRegister.ReaderCohortSize; i++) + { + ReaderRegister.CP_ReaderInfo[i] = + (CP_ReaderInitInfo)pointers[i]->CP_Info; + ReaderRegister.DP_ReaderInfo[i] = pointers[i]->DP_Info; + } + free(pointers); + + /* the response value is set in the handler */ + volatile struct _WriterResponseMsg *response = NULL; + CMCondition_set_client_data(Stream->CPInfo->SharedCM->cm, + ReaderRegister.WriterResponseCondition, + &response); + + if (CMwrite(rank0_to_rank0_conn, + Stream->CPInfo->SharedCM->ReaderRegisterFormat, + &ReaderRegister) != 1) + { + CP_verbose(Stream, CriticalVerbose, + "Message failed to send to writer in SstReaderOpen\n"); + } + free(ReaderRegister.CP_ReaderInfo); + free(ReaderRegister.DP_ReaderInfo); + + /* wait for "go" from writer */ + CP_verbose( + Stream, PerRankVerbose, + "Waiting for writer response message in SstReadOpen(\"%s\")\n", + Filename, ReaderRegister.WriterResponseCondition); + CMCondition_wait(Stream->CPInfo->SharedCM->cm, + ReaderRegister.WriterResponseCondition); + CP_verbose(Stream, PerRankVerbose, + "finished wait writer response message in read_open\n"); + + if (response) + { + WriterData.WriterCohortSize = response->WriterCohortSize; + WriterData.WriterConfigParams = response->WriterConfigParams; + WriterData.StartingStepNumber = response->NextStepNumber; + WriterData.CP_WriterInfo = response->CP_WriterInfo; + WriterData.DP_WriterInfo = response->DP_WriterInfo; + } + ReturnData = CP_distributeDataFromRankZero( + Stream, &WriterData, Stream->CPInfo->CombinedWriterInfoFormat, + &free_block); + } + else + { + ReturnData = CP_distributeDataFromRankZero( + Stream, NULL, Stream->CPInfo->CombinedWriterInfoFormat, + &free_block); + } + + free(data_block); + + if (ReturnData->WriterCohortSize == -1) + { + /* Rank 0 found no writer at that contact point, fail the stream */ + free(free_block); + return NULL; + } + + if (Stream->Rank == 0) + { + CP_verbose(Stream, SummaryVerbose, + "Opening Reader Stream.\nWriter stream params are:\n"); + CP_dumpParams(Stream, ReturnData->WriterConfigParams, + 0 /* writer side */); + CP_verbose(Stream, SummaryVerbose, "Reader stream params are:\n"); + CP_dumpParams(Stream, Stream->ConfigParams, 1 /* reader side */); + } + + // printf("I am reader rank %d, my info on writers is:\n", Stream->Rank); + // FMdump_data(FMFormat_of_original(Stream->CPInfo->combined_writer_Format), + // ReturnData, 1024000); + // printf("\n"); + + Stream->WriterCohortSize = ReturnData->WriterCohortSize; + Stream->WriterConfigParams = ReturnData->WriterConfigParams; + if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) && + (Stream->Rank == 0)) + { + CP_verbose(Stream, SummaryVerbose, + "Writer is doing FFS-based marshalling\n"); + } + if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalBP) && + (Stream->Rank == 0)) + { + CP_verbose(Stream, SummaryVerbose, + "Writer is doing BP-based marshalling\n"); + } + if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && + (Stream->Rank == 0)) + { + CP_verbose( + Stream, SummaryVerbose, + "Writer is using Minimum Connection Communication pattern (min)\n"); + } + if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) && + (Stream->Rank == 0)) + { + CP_verbose(Stream, SummaryVerbose, + "Writer is using Peer-based Communication pattern (peer)\n"); + } + STREAM_MUTEX_LOCK(Stream); + Stream->ReaderTimestep = ReturnData->StartingStepNumber - 1; + + if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) + { + /* + * Wait for connections and messages from writer side peers + */ + getPeerArrays(Stream->CohortSize, Stream->Rank, + Stream->WriterCohortSize, &Stream->Peers, NULL); + + while (!HasAllPeers(Stream)) + { + /* wait until we get the timestep metadata or something else changes + */ + STREAM_CONDITION_WAIT(Stream); + } + } + else + { + if (!Stream->ConnectionsToWriter) + { + Stream->ConnectionsToWriter = + calloc(sizeof(CP_PeerConnection), ReturnData->WriterCohortSize); + } + } + + for (int i = 0; i < ReturnData->WriterCohortSize; i++) + { + attr_list attrs = + attr_list_from_string(ReturnData->CP_WriterInfo[i]->ContactInfo); + Stream->ConnectionsToWriter[i].ContactList = attrs; + Stream->ConnectionsToWriter[i].RemoteStreamID = + ReturnData->CP_WriterInfo[i]->WriterID; + } + + // Deref the original connection to writer rank 0 (might still be open as a + // peer) + if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) + { + if (rank0_to_rank0_conn) + { + CMConnection_dereference(rank0_to_rank0_conn); + } + } + else + { + /* only rely on the rank 0 to rank 0 that we already have (if we're rank + * 0) */ + if (rank0_to_rank0_conn) + { + CMConnection conn = rank0_to_rank0_conn; + Stream->ConnectionsToWriter[0].CMconn = conn; + CMconn_register_close_handler(conn, ReaderConnCloseHandler, + (void *)Stream); + } + } + Stream->Status = Established; + gettimeofday(&Stop, NULL); + timersub(&Stop, &Start, &Diff); + Stream->OpenTimeSecs = (double)Diff.tv_usec / 1e6 + Diff.tv_sec; + gettimeofday(&Stream->ValidStartTime, NULL); + Stream->Filename = Filename; + Stream->ParamsBlock = free_block; + STREAM_MUTEX_UNLOCK(Stream); + AddToLastCallFreeList(Stream); + Stream->DP_Interface->provideWriterDataToReader( + &Svcs, Stream->DP_Stream, ReturnData->WriterCohortSize, + Stream->ConnectionsToWriter, ReturnData->DP_WriterInfo); + CP_verbose(Stream, PerRankVerbose, + "Sending Reader Activate messages to writer\n"); + memset(&Msg, 0, sizeof(Msg)); + sendOneToEachWriterRank(Stream, + Stream->CPInfo->SharedCM->ReaderActivateFormat, + &Msg, &Msg.WSR_Stream); + CP_verbose(Stream, PerStepVerbose, + "Finish opening Stream \"%s\", starting with Step number %d\n", + Filename, ReturnData->StartingStepNumber); + + return Stream; +} + +// SstReaderGetParams is an SST entry point only called by the main +// program thread. It can only be called after initialization and +// only accesses data installed durinig initialization, it needs no +// locking. +extern void SstReaderGetParams(SstStream Stream, + SstMarshalMethod *WriterMarshalMethod, + int *WriterIsRowMajor) +{ + *WriterMarshalMethod = + (SstMarshalMethod)Stream->WriterConfigParams->MarshalMethod; + *WriterIsRowMajor = Stream->WriterConfigParams->IsRowMajor; +} + +/* + * CP_PeerSetupHandler is called by the network handler thread in + * response to incoming PeerSetup messages to setup the reader-side + * Peer list + */ +extern void CP_PeerSetupHandler(CManager cm, CMConnection conn, void *Msg_v, + void *client_data, attr_list attrs) +{ + TAU_START_FUNC(); + SstStream Stream; + struct _PeerSetupMsg *Msg = (struct _PeerSetupMsg *)Msg_v; + Stream = (SstStream)Msg->RS_Stream; + STREAM_MUTEX_LOCK(Stream); + CP_verbose(Stream, TraceVerbose, + "Received peer setup from rank %d, conn %p\n", Msg->WriterRank, + conn); + if (!Stream->ConnectionsToWriter) + { + CP_verbose(Stream, TraceVerbose, "Allocating connections to writer\n"); + Stream->ConnectionsToWriter = + calloc(sizeof(CP_PeerConnection), Msg->WriterCohortSize); + } + CP_verbose(Stream, TraceVerbose, + "Received peer setup from rank %d, conn %p\n", Msg->WriterRank, + conn); + if (Msg->WriterRank != -1) + { + Stream->ConnectionsToWriter[Msg->WriterRank].CMconn = conn; + CMConnection_add_reference(conn); + Stream->FailureContactRank = Msg->WriterRank; + } + CMconn_register_close_handler(conn, ReaderConnCloseHandler, (void *)Stream); + STREAM_CONDITION_SIGNAL(Stream); + STREAM_MUTEX_UNLOCK(Stream); + TAU_STOP_FUNC(); +} + +void queueTimestepMetadataMsgAndNotify(SstStream Stream, + struct _TimestepMetadataMsg *tsm, + CMConnection conn) +{ + STREAM_ASSERT_LOCKED(Stream); + if (tsm->Timestep < Stream->DiscardPriorTimestep) + { + struct _ReleaseTimestepMsg Msg; + memset(&Msg, 0, sizeof(Msg)); + Msg.Timestep = tsm->Timestep; + + /* + * send each writer rank a release for this timestep (actually goes to + * WSR Streams) + */ + if (tsm->Metadata != NULL) + { + CP_verbose(Stream, PerStepVerbose, + "Sending ReleaseTimestep message for PRIOR DISCARD " + "timestep %d, one to each writer\n", + tsm->Timestep); + sendOneToEachWriterRank( + Stream, Stream->CPInfo->SharedCM->ReleaseTimestepFormat, &Msg, + &Msg.WSR_Stream); + } + else + { + CP_verbose(Stream, PerStepVerbose, + "Received discard notice for timestep %d, " + "ignoring in PRIOR DISCARD\n", + tsm->Timestep); + } + } + + struct _TimestepMetadataList *New = malloc(sizeof(struct _RequestQueue)); + New->MetadataMsg = tsm; + New->Next = NULL; + if (Stream->Timesteps) + { + struct _TimestepMetadataList *Last = Stream->Timesteps; + while (Last->Next) + { + Last = Last->Next; + } + Last->Next = New; + } + else + { + Stream->Timesteps = New; + } + Stream->Stats.TimestepMetadataReceived++; + if (tsm->Metadata) + { + Stream->Stats.MetadataBytesReceived += + (tsm->Metadata->DataSize + tsm->AttributeData->DataSize); + } + CP_verbose(Stream, PerRankVerbose, + "Received a Timestep metadata message for timestep %d, " + "signaling condition\n", + tsm->Timestep); + + STREAM_CONDITION_SIGNAL(Stream); + if ((Stream->Rank == 0) && + (Stream->WriterConfigParams->CPCommPattern == SstCPCommMin) && + (Stream->ConfigParams->AlwaysProvideLatestTimestep)) + { + /* + * IFF we are in CommMin mode, AND we are to always provide + * the newest timestep, then when a new timestep arrives then + * we want to release timesteps that are older than it, NOT + * INCLUDING ANY TIMESTEP IN CURRENT USE. + */ + CP_verbose(Stream, TraceVerbose, + "Got a new timestep in AlwaysProvideLatestTimestep mode, " + "discard older than %d\n", + tsm->Timestep); + releasePriorTimesteps(Stream, tsm->Timestep); + } +} + +struct _SstMetaMetaBlockInternal +{ + size_t TimestepAdded; + char *BlockData; + size_t BlockSize; + char *ID; + size_t IDSize; +}; + +void AddFormatsToMetaMetaInfo(SstStream Stream, + struct _TimestepMetadataMsg *Msg) +{ + FFSFormatList Formats = Msg->Formats; + while (Formats) + { + Stream->InternalMetaMetaInfo = + realloc(Stream->InternalMetaMetaInfo, + (sizeof(struct _SstMetaMetaBlockInternal) * + (Stream->InternalMetaMetaCount + 1))); + struct _SstMetaMetaBlockInternal *NewInfo = + &Stream->InternalMetaMetaInfo[Stream->InternalMetaMetaCount]; + Stream->InternalMetaMetaCount++; + NewInfo->TimestepAdded = Msg->Timestep; + NewInfo->ID = malloc(Formats->FormatIDRepLen); + NewInfo->IDSize = Formats->FormatIDRepLen; + NewInfo->BlockData = malloc(Formats->FormatServerRepLen); + NewInfo->BlockSize = Formats->FormatServerRepLen; + memcpy(NewInfo->ID, Formats->FormatIDRep, Formats->FormatIDRepLen); + memcpy(NewInfo->BlockData, Formats->FormatServerRep, + Formats->FormatServerRepLen); + Formats = Formats->Next; + } +} + +void AddAttributesToAttrDataList(SstStream Stream, + struct _TimestepMetadataMsg *Msg) +{ + if (Stream->AttrsRetrieved) + { + int i = 0; + while (Stream->InternalAttrDataInfo && + Stream->InternalAttrDataInfo[i].BlockData) + { + free(Stream->InternalAttrDataInfo[i].BlockData); + i++; + } + free(Stream->InternalAttrDataInfo); + Stream->InternalAttrDataInfo = NULL; + Stream->InternalAttrDataCount = 0; + Stream->AttrsRetrieved = 0; + } + if (Msg->AttributeData->DataSize == 0) + return; + + Stream->InternalAttrDataInfo = realloc( + Stream->InternalAttrDataInfo, + (sizeof(struct _SstBlock) * (Stream->InternalAttrDataCount + 2))); + struct _SstBlock *NewInfo = + &Stream->InternalAttrDataInfo[Stream->InternalAttrDataCount]; + Stream->InternalAttrDataCount++; + NewInfo->BlockData = malloc(Msg->AttributeData->DataSize); + NewInfo->BlockSize = Msg->AttributeData->DataSize; + memcpy(NewInfo->BlockData, Msg->AttributeData->block, + Msg->AttributeData->DataSize); + memset(&Stream->InternalAttrDataInfo[Stream->InternalAttrDataCount], 0, + sizeof(struct _SstData)); +} + +// CP_TimestepMetadataHandler is called by the network handler thread +// to handle incoming TimestepMetadata messages +void CP_TimestepMetadataHandler(CManager cm, CMConnection conn, void *Msg_v, + void *client_data, attr_list attrs) +{ + TAU_START_FUNC(); + SstStream Stream; + struct _TimestepMetadataMsg *Msg = (struct _TimestepMetadataMsg *)Msg_v; + Stream = (SstStream)Msg->RS_Stream; + STREAM_MUTEX_LOCK(Stream); + if ((Stream->Rank != 0) || + (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer)) + { + /* All ranks are getting this */ + if (Msg->Metadata == NULL) + { + CP_verbose( + Stream, PerRankVerbose, + "Received a message that timestep %d has been discarded\n", + Msg->Timestep); + + /* + * before discarding, install any precious metadata from this + * message + */ + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) + { + FFSMarshalInstallPreciousMetadata(Stream, Msg); + } + else if (Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) + { + AddFormatsToMetaMetaInfo(Stream, Msg); + AddAttributesToAttrDataList(Stream, Msg); + } + STREAM_MUTEX_UNLOCK(Stream); + + return; + } + else + { + CP_verbose( + Stream, PerStepVerbose, + "Received an incoming metadata message for timestep %d\n", + Msg->Timestep); + } + /* arrange for this message data to stay around */ + CMtake_buffer(cm, Msg); + + queueTimestepMetadataMsgAndNotify(Stream, Msg, conn); + } + else + { + /* I must be rank 0 and only I got this, I'll need to distribute it to + * everyone */ + /* arrange for this message data to stay around */ + CMtake_buffer(cm, Msg); + + queueTimestepMetadataMsgAndNotify(Stream, Msg, conn); + } + STREAM_MUTEX_UNLOCK(Stream); + TAU_STOP_FUNC(); +} + +// CP_WriterResponseHandler is called by the network handler thread to +// handle WriterResponse messages. One of these will be sent to rank0 +// reader from rank0 writer in response to the ReaderRegister message. +// It will find rank0 writer in CMCondition_wait(). It's only action +// is to associate the incoming response message to the CMcondition +// we're waiting on,m so no locking is necessary. +void CP_WriterResponseHandler(CManager cm, CMConnection conn, void *Msg_v, + void *client_data, attr_list attrs) +{ + TAU_REGISTER_THREAD(); + TAU_START_FUNC(); + struct _WriterResponseMsg *Msg = (struct _WriterResponseMsg *)Msg_v; + struct _WriterResponseMsg **response_ptr; + // fprintf(stderr, "Received a writer_response message for condition + // %d\n", + // Msg->WriterResponseCondition); + // fprintf(stderr, "The responding writer has cohort of size %d :\n", + // Msg->writer_CohortSize); + // for (int i = 0; i < Msg->writer_CohortSize; i++) { + // fprintf(stderr, " rank %d CP contact info: %s, %p\n", i, + // Msg->CP_WriterInfo[i]->ContactInfo, + // Msg->CP_WriterInfo[i]->WriterID); + // } + + /* arrange for this message data to stay around */ + CMtake_buffer(cm, Msg); + + /* attach the message to the CMCondition so it an be retrieved by the main + * thread */ + response_ptr = + CMCondition_get_client_data(cm, Msg->WriterResponseCondition); + *response_ptr = Msg; + + /* wake the main thread */ + CMCondition_signal(cm, Msg->WriterResponseCondition); + TAU_STOP_FUNC(); +} + +// CP_WriterCloseHandler is called by the network handler thread to +// handle WriterResponse messages. One of these will be sent to rank0 +// reader from rank0 writer in response to the ReaderRegister message. +// It will find rank0 writer in CMCondition_wait(). It's only action +// is to associate the incoming response message to the CMcondition +// we're waiting on, so no locking is necessary. +extern void CP_WriterCloseHandler(CManager cm, CMConnection conn, void *Msg_v, + void *client_data, attr_list attrs) +{ + TAU_START_FUNC(); + WriterCloseMsg Msg = (WriterCloseMsg)Msg_v; + SstStream Stream = (SstStream)Msg->RS_Stream; + + STREAM_MUTEX_LOCK(Stream); + CP_verbose(Stream, PerStepVerbose, + "Received a writer close message. " + "Timestep %d was the final timestep.\n", + Msg->FinalTimestep); + + Stream->FinalTimestep = Msg->FinalTimestep; + Stream->Status = PeerClosed; + /* wake anyone that might be waiting */ + STREAM_CONDITION_SIGNAL(Stream); + STREAM_MUTEX_UNLOCK(Stream); + TAU_STOP_FUNC(); +} + +// CP_CommPatternLockedHandler is called by the network handler thread +// to handle CommPatternLocked messages. It can only be called +// post-registration and won't be called after Close. Lock to protect +// against race conditions in determining comm lock scenario. +extern void CP_CommPatternLockedHandler(CManager cm, CMConnection conn, + void *Msg_v, void *client_data, + attr_list attrs) +{ + CommPatternLockedMsg Msg = (CommPatternLockedMsg)Msg_v; + SstStream Stream = (SstStream)Msg->RS_Stream; + + STREAM_MUTEX_LOCK(Stream); + CP_verbose( + Stream, PerStepVerbose, + "Received a CommPatternLocked message, beginning with Timestep %d.\n", + Msg->Timestep); + + Stream->CommPatternLocked = 1; + Stream->CommPatternLockedTimestep = Msg->Timestep; + STREAM_MUTEX_UNLOCK(Stream); +} + +static long MaxQueuedMetadata(SstStream Stream) +{ + struct _TimestepMetadataList *Next; + long MaxTimestep = -1; + STREAM_ASSERT_LOCKED(Stream); + Next = Stream->Timesteps; + if (Next == NULL) + { + CP_verbose(Stream, TraceVerbose, "MaxQueued Timestep returning -1\n"); + return -1; + } + while (Next) + { + if (Next->MetadataMsg->Timestep >= MaxTimestep) + { + MaxTimestep = Next->MetadataMsg->Timestep; + } + Next = Next->Next; + } + CP_verbose(Stream, TraceVerbose, "MaxQueued Timestep returning %ld\n", + MaxTimestep); + return MaxTimestep; +} + +static long NextQueuedMetadata(SstStream Stream) +{ + struct _TimestepMetadataList *Next; + long MinTimestep = LONG_MAX; + STREAM_ASSERT_LOCKED(Stream); + Next = Stream->Timesteps; + if (Next == NULL) + { + CP_verbose(Stream, TraceVerbose, "NextQueued Timestep returning -1\n"); + return -1; + } + while (Next) + { + if (Next->MetadataMsg->Timestep <= MinTimestep) + { + MinTimestep = Next->MetadataMsg->Timestep; + } + Next = Next->Next; + } + CP_verbose(Stream, TraceVerbose, "NextQueued Timestep returning %ld\n", + MinTimestep); + return MinTimestep; +} + +// A delayed task to wake the stream after a specific time period +static void triggerDataCondition(CManager cm, void *vStream) +{ + SstStream Stream = (SstStream)vStream; + + STREAM_MUTEX_LOCK(Stream); + /* wake the sleeping main thread for timeout */ + STREAM_CONDITION_SIGNAL(Stream); + STREAM_MUTEX_UNLOCK(Stream); +} + +static void waitForMetadataWithTimeout(SstStream Stream, float timeout_secs) +{ + struct _TimestepMetadataList *Next; + struct timeval start, now, end; + int timeout_int_sec = floor(timeout_secs); + int timeout_int_usec = ((timeout_secs - floorf(timeout_secs)) * 1000000); + CMTaskHandle TimeoutTask = NULL; + + STREAM_ASSERT_LOCKED(Stream); + gettimeofday(&start, NULL); + Next = Stream->Timesteps; + CP_verbose( + Stream, PerRankVerbose, + "Wait for metadata with timeout %g secs starting at time %ld.%06ld \n", + timeout_secs, start.tv_sec, start.tv_usec); + if (Next) + { + CP_verbose(Stream, PerRankVerbose, + "Returning from wait with timeout, NO TIMEOUT\n"); + } + end.tv_sec = start.tv_sec + timeout_int_sec; + end.tv_usec = start.tv_usec + timeout_int_usec; + if (end.tv_usec > 1000000) + { + end.tv_sec++; + end.tv_usec -= 1000000; + } + if (end.tv_sec < start.tv_sec) + { + // rollover + end.tv_sec = INT_MAX; + } + // special case + if (timeout_secs == 0.0) + { + CP_verbose( + Stream, PerRankVerbose, + "Returning from wait With no data after zero timeout poll\n"); + return; + } + + TimeoutTask = + CMadd_delayed_task(Stream->CPInfo->SharedCM->cm, timeout_int_sec, + timeout_int_usec, triggerDataCondition, Stream); + while (1) + { + Next = Stream->Timesteps; + if (Next) + { + CMremove_task(TimeoutTask); + CP_verbose(Stream, PerRankVerbose, + "Returning from wait with timeout, NO TIMEOUT\n"); + return; + } + if (Stream->Status != Established) + { + CP_verbose(Stream, PerRankVerbose, + "Returning from wait with timeout, STREAM NO " + "LONGER ESTABLISHED\n"); + return; + } + gettimeofday(&now, NULL); + CP_verbose(Stream, TraceVerbose, + "timercmp, now is %ld.%06ld end is %ld.%06ld \n", + now.tv_sec, now.tv_usec, end.tv_sec, end.tv_usec); + if (timercmp(&now, &end, >)) + { + CP_verbose(Stream, PerRankVerbose, + "Returning from wait after timing out\n"); + return; + } + /* wait until we get the timestep metadata or something else changes */ + STREAM_CONDITION_WAIT(Stream); + } + /* NOTREACHED */ +} + +static void releasePriorTimesteps(SstStream Stream, long Latest) +{ + struct _TimestepMetadataList *Next, *Last; + STREAM_ASSERT_LOCKED(Stream); + CP_verbose(Stream, PerRankVerbose, + "Releasing any timestep earlier than %d\n", Latest); + Next = Stream->Timesteps; + Last = NULL; + while (Next) + { + if ((Next->MetadataMsg->Timestep < Latest) && + (Next->MetadataMsg->Timestep != Stream->CurrentWorkingTimestep)) + { + struct _TimestepMetadataList *This = Next; + struct _ReleaseTimestepMsg Msg; + Next = This->Next; + + /* + * before discarding, install any precious metadata from this + * message + */ + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) + { + FFSMarshalInstallPreciousMetadata(Stream, This->MetadataMsg); + } + else if (Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) + { + AddFormatsToMetaMetaInfo(Stream, This->MetadataMsg); + AddAttributesToAttrDataList(Stream, This->MetadataMsg); + } + + memset(&Msg, 0, sizeof(Msg)); + Msg.Timestep = This->MetadataMsg->Timestep; + + /* + * send each writer rank a release for this timestep (actually goes + * to WSR + * Streams) + */ + CP_verbose(Stream, PerRankVerbose, + "Sending ReleaseTimestep message for RELEASE " + "PRIOR timestep %d, one to each writer\n", + This->MetadataMsg->Timestep); + + if (Last == NULL) + { + Stream->Timesteps = Next; + } + else + { + Last->Next = Next; + } + STREAM_MUTEX_UNLOCK(Stream); + sendOneToEachWriterRank( + Stream, Stream->CPInfo->SharedCM->ReleaseTimestepFormat, &Msg, + &Msg.WSR_Stream); + if (This->MetadataMsg == NULL) + printf("READER RETURN_BUFFER, metadatamsg == %p, line %d\n", + This->MetadataMsg, __LINE__); + CMreturn_buffer(Stream->CPInfo->SharedCM->cm, This->MetadataMsg); + STREAM_MUTEX_LOCK(Stream); + free(This); + } + else + { + Last = Next; + Next = Next->Next; + } + } +} + +static void FreeTimestep(SstStream Stream, long Timestep) +{ + /* + * remove local metadata for that timestep + */ + struct _TimestepMetadataList *List = Stream->Timesteps; + + STREAM_ASSERT_LOCKED(Stream); + if (Stream->Timesteps->MetadataMsg->Timestep == Timestep) + { + Stream->Timesteps = List->Next; + if (List->MetadataMsg == NULL) + printf("READER RETURN_BUFFER, List->MEtadataMsg == %p, line %d\n", + List->MetadataMsg, __LINE__); + CMreturn_buffer(Stream->CPInfo->SharedCM->cm, List->MetadataMsg); + + free(List); + } + else + { + struct _TimestepMetadataList *last = List; + List = List->Next; + while (List != NULL) + { + if (List->MetadataMsg->Timestep == Timestep) + { + last->Next = List->Next; + if (List->MetadataMsg == NULL) + printf("READER RETURN_BUFFER, List->MEtadataMsg == %p, " + "line %d\n", + List->MetadataMsg, __LINE__); + CMreturn_buffer(Stream->CPInfo->SharedCM->cm, + List->MetadataMsg); + + free(List); + break; + } + last = List; + List = List->Next; + } + } +} + +static TSMetadataList waitForNextMetadata(SstStream Stream, long LastTimestep) +{ + TSMetadataList FoundTS = NULL; + CP_verbose(Stream, PerRankVerbose, + "Wait for next metadata after last timestep %d\n", LastTimestep); + while (1) + { + struct _TimestepMetadataList *Next; + Next = Stream->Timesteps; + while (Next) + { + CP_verbose(Stream, TraceVerbose, + "Examining metadata for Timestep %d\n", + Next->MetadataMsg->Timestep); + if (((Next->MetadataMsg->Metadata == NULL) || + (Next->MetadataMsg->Timestep < + Stream->DiscardPriorTimestep)) && + (FoundTS == NULL)) + { + /* + * Either this is a dummy timestep for something that + * was discarded on the writer side, or it is a + * timestep that satisfies DiscardPriorTimestep and + * we've already sent a release for it. Now is the + * time to install the 'precious' info that it carried + * (Attributes and formats) and then discard it. + */ + CP_verbose(Stream, PerRankVerbose, + "SstAdvanceStep installing precious " + "metadata for discarded TS %d\n", + Next->MetadataMsg->Timestep); + FFSMarshalInstallPreciousMetadata(Stream, Next->MetadataMsg); + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) + { + AddFormatsToMetaMetaInfo(Stream, Next->MetadataMsg); + AddAttributesToAttrDataList(Stream, Next->MetadataMsg); + } + TSMetadataList Tmp = Next; + Next = Next->Next; + FreeTimestep(Stream, Tmp->MetadataMsg->Timestep); + continue; + } + if (Next->MetadataMsg->Timestep >= LastTimestep) + { + if ((FoundTS == NULL) && + (Next->MetadataMsg->Timestep > LastTimestep)) + { + FoundTS = Next; + break; + } + else if ((FoundTS != NULL) && (FoundTS->MetadataMsg->Timestep > + Next->MetadataMsg->Timestep)) + { + FoundTS = Next; + break; + } + } + Next = Next->Next; + } + if (FoundTS) + { + CP_verbose(Stream, PerRankVerbose, + "Returning metadata for Timestep %d\n", + FoundTS->MetadataMsg->Timestep); + Stream->CurrentWorkingTimestep = FoundTS->MetadataMsg->Timestep; + return FoundTS; + } + /* didn't find a good next timestep, check Stream status */ + if ((Stream->Status != Established) || + ((Stream->FinalTimestep != INT_MAX) && + (Stream->FinalTimestep >= LastTimestep))) + { + CP_verbose(Stream, TraceVerbose, + "Stream Final Timestep is %d, last timestep was %d\n", + Stream->FinalTimestep, LastTimestep); + if (Stream->Status == NotOpen) + { + CP_verbose(Stream, PerRankVerbose, + "Wait for next metadata returning NULL because " + "channel was never fully established\n"); + } + else if (Stream->Status == PeerFailed) + { + CP_verbose(Stream, PerRankVerbose, + "Wait for next metadata returning NULL because " + "the connection failed before final timestep " + "notification\n"); + } + else + { + CP_verbose(Stream, PerStepVerbose, + "Wait for next metadata returning NULL, status %d ", + Stream->Status); + } + /* closed or failed, return NULL */ + Stream->CurrentWorkingTimestep = -1; + return NULL; + } + CP_verbose(Stream, PerRankVerbose, + "Waiting for metadata for a Timestep later than TS %d\n", + LastTimestep); + CP_verbose(Stream, TraceVerbose, + "(PID %lx, TID %lx) Stream status is %s\n", (long)getpid(), + (long)gettid(), SSTStreamStatusStr[Stream->Status]); + /* wait until we get the timestep metadata or something else changes */ + STREAM_CONDITION_WAIT(Stream); + } + /* NOTREACHED */ +} + +// SstGetCurMetadata is an SST entry point only called by the main +// program thread. Only accesses the CurrentMetadata field which is +// touched only by other subroutines called by the main program +// thread, it needs no locking. +extern SstFullMetadata SstGetCurMetadata(SstStream Stream) +{ + return Stream->CurrentMetadata; +} + +extern SstMetaMetaList SstGetNewMetaMetaData(SstStream Stream, long Timestep) +{ + int RetCount = 0; + for (int i = 0; i < Stream->InternalMetaMetaCount; i++) + { + if (Stream->InternalMetaMetaInfo[i].TimestepAdded >= Timestep) + RetCount++; + } + if (RetCount == 0) + return NULL; + SstMetaMetaList ret = malloc(sizeof(ret[0]) * (RetCount + 1)); + int j = 0; + for (int i = 0; i < Stream->InternalMetaMetaCount; i++) + { + if (Stream->InternalMetaMetaInfo[i].TimestepAdded >= Timestep) + { + // no copies, keep memory ownership in SST + ret[j].BlockData = Stream->InternalMetaMetaInfo[i].BlockData; + ret[j].BlockSize = Stream->InternalMetaMetaInfo[i].BlockSize; + ret[j].ID = Stream->InternalMetaMetaInfo[i].ID; + ret[j].IDSize = Stream->InternalMetaMetaInfo[i].IDSize; + j++; + } + } + memset(&ret[j], 0, sizeof(ret[j])); + return ret; +} + +extern SstBlock SstGetAttributeData(SstStream Stream, long Timestep) +{ + Stream->AttrsRetrieved = 1; + return Stream->InternalAttrDataInfo; +} + +static void AddToReadStats(SstStream Stream, int Rank, long Timestep, + size_t Length) +{ + if (!Stream->RanksRead) + Stream->RanksRead = calloc(1, Stream->WriterCohortSize); + Stream->RanksRead[Rank] = 1; + Stream->Stats.BytesRead += Length; +} + +#ifndef min +#define min(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +static void ReleaseTSReadStats(SstStream Stream, long Timestep) +{ + int ThisFanIn = 0; + if (Stream->RanksRead) + { + for (int i = 0; i < Stream->WriterCohortSize; i++) + { + if (Stream->RanksRead[i]) + ThisFanIn++; + } + memset(Stream->RanksRead, 0, Stream->WriterCohortSize); + } + if (Stream->Stats.TimestepsConsumed == 1) + { + Stream->Stats.RunningFanIn = ThisFanIn; + } + else + { + Stream->Stats.RunningFanIn = + Stream->Stats.RunningFanIn + + ((double)ThisFanIn - Stream->Stats.RunningFanIn) / + min(Stream->Stats.TimestepsConsumed, 100); + } +} + +// SstReadRemotememory is only called by the main +// program thread. +extern void *SstReadRemoteMemory(SstStream Stream, int Rank, long Timestep, + size_t Offset, size_t Length, void *Buffer, + void *DP_TimestepInfo) +{ + if (Stream->ConfigParams->ReaderShortCircuitReads) + return NULL; + Stream->Stats.BytesTransferred += Length; + AddToReadStats(Stream, Rank, Timestep, Length); + return Stream->DP_Interface->readRemoteMemory( + &Svcs, Stream->DP_Stream, Rank, Timestep, Offset, Length, Buffer, + DP_TimestepInfo); +} + +static void sendOneToEachWriterRank(SstStream Stream, CMFormat f, void *Msg, + void **WS_StreamPtr) +{ + if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) + { + int i = 0; + while (Stream->Peers[i] != -1) + { + int peer = Stream->Peers[i]; + CMConnection conn = Stream->ConnectionsToWriter[peer].CMconn; + /* add the writer Stream identifier to each outgoing + * message */ + *WS_StreamPtr = Stream->ConnectionsToWriter[peer].RemoteStreamID; + if (CMwrite(conn, f, Msg) != 1) + { + switch (Stream->Status) + { + case NotOpen: + case Opening: + case Established: + CP_verbose(Stream, CriticalVerbose, + "Message failed to send to writer %d (%p)\n", + peer, *WS_StreamPtr); + break; + case PeerClosed: + case PeerFailed: + case Closed: + case Destroyed: + // Don't warn on send failures for closing/closed clients + break; + } + } + i++; + } + } + else + { + if (Stream->Rank == 0) + { + int peer = 0; + CMConnection conn = Stream->ConnectionsToWriter[peer].CMconn; + /* add the writer Stream identifier to each outgoing + * message */ + *WS_StreamPtr = Stream->ConnectionsToWriter[peer].RemoteStreamID; + if (CMwrite(conn, f, Msg) != 1) + { + switch (Stream->Status) + { + case NotOpen: + case Opening: + case Established: + CP_verbose(Stream, CriticalVerbose, + "Message failed to send to writer %d (%p)\n", + peer, *WS_StreamPtr); + break; + case PeerClosed: + case PeerFailed: + case Closed: + case Destroyed: + // Don't warn on send failures for closing/closed clients + break; + } + } + } + } +} + +// SstReaderDefinitionLock is only called by the main +// program thread. +extern void SstReaderDefinitionLock(SstStream Stream, long EffectiveTimestep) +{ + struct _LockReaderDefinitionsMsg Msg; + + memset(&Msg, 0, sizeof(Msg)); + Msg.Timestep = EffectiveTimestep; + + sendOneToEachWriterRank( + Stream, Stream->CPInfo->SharedCM->LockReaderDefinitionsFormat, &Msg, + &Msg.WSR_Stream); +} + +// SstReleaseStep is only called by the main program thread. It +// locks to protect the timestep list before freeing the local +// representation of the resleased timestep. +extern void SstReleaseStep(SstStream Stream) +{ + long Timestep = Stream->ReaderTimestep; + struct _ReleaseTimestepMsg Msg; + + TAU_START_FUNC(); + STREAM_MUTEX_LOCK(Stream); + if (Stream->DP_Interface->RSReleaseTimestep) + { + (Stream->DP_Interface->RSReleaseTimestep)(&Svcs, Stream->DP_Stream, + Timestep); + } + ReleaseTSReadStats(Stream, Timestep); + STREAM_MUTEX_UNLOCK(Stream); + + if ((Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) || + (Stream->Rank == 0)) + { + STREAM_MUTEX_LOCK(Stream); + FreeTimestep(Stream, Timestep); + STREAM_MUTEX_UNLOCK(Stream); + } + + SMPI_Barrier(Stream->mpiComm); + + memset(&Msg, 0, sizeof(Msg)); + Msg.Timestep = Timestep; + + /* + * send each writer rank a release for this timestep (actually goes to WSR + * Streams) + */ + CP_verbose( + Stream, PerRankVerbose, + "Sending ReleaseTimestep message for timestep %d, one to each writer\n", + Timestep); + sendOneToEachWriterRank(Stream, + Stream->CPInfo->SharedCM->ReleaseTimestepFormat, + &Msg, &Msg.WSR_Stream); + + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) + { + FFSClearTimestepData(Stream); + } + TAU_STOP_FUNC(); +} + +static void NotifyDPArrivedMetadata(SstStream Stream, + struct _TimestepMetadataMsg *MetadataMsg) +{ + if ((MetadataMsg->Metadata != NULL) && + (MetadataMsg->Timestep > Stream->LastDPNotifiedTimestep)) + { + if (Stream->DP_Interface->timestepArrived) + { + Stream->DP_Interface->timestepArrived(&Svcs, Stream->DP_Stream, + MetadataMsg->Timestep, + MetadataMsg->PreloadMode); + } + Stream->LastDPNotifiedTimestep = MetadataMsg->Timestep; + } +} + +/* + * wait for metadata for Timestep indicated to arrive, or fail with EndOfStream + * or Error + */ +static SstStatusValue SstAdvanceStepPeer(SstStream Stream, SstStepMode mode, + const float timeout_sec) +{ + + TSMetadataList Entry; + + TAU_START("Waiting on metadata per rank per timestep"); + + if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) + { + struct _GlobalOpInfo + { + float timeout_sec; + int mode; + long LatestTimestep; + }; + struct _GlobalOpInfo my_info; + struct _GlobalOpInfo *global_info = NULL; + long NextTimestep; + + if (Stream->Rank == 0) + { + global_info = malloc(sizeof(my_info) * Stream->CohortSize); + CP_verbose(Stream, PerRankVerbose, + "In special case of advancestep, mode is %d, " + "Timeout Sec is %g, flt_max is %g\n", + mode, timeout_sec, FLT_MAX); + } + my_info.LatestTimestep = MaxQueuedMetadata(Stream); + my_info.timeout_sec = timeout_sec; + my_info.mode = mode; + SMPI_Gather(&my_info, sizeof(my_info), SMPI_CHAR, global_info, + sizeof(my_info), SMPI_CHAR, 0, Stream->mpiComm); + if (Stream->Rank == 0) + { + long Biggest = -1; + long Smallest = LONG_MAX; + for (int i = 0; i < Stream->CohortSize; i++) + { + if (global_info[i].LatestTimestep > Biggest) + { + Biggest = global_info[i].LatestTimestep; + } + if (global_info[i].LatestTimestep < Smallest) + { + Smallest = global_info[i].LatestTimestep; + } + } + + free(global_info); + + /* + * Several situations are possible here, depending upon + * whether or not a timeout is specified and/or + * LatestAvailable is specified, and whether or not we + * have timesteps queued anywhere. If they want + * LatestAvailable and we have any Timesteps queued + * anywhere, we decide upon a timestep to return and + * assume that all ranks will get it soon (or else we're + * in failure mode). If there are no timesteps queued + * anywhere, then we're going to wait for timeout seconds + * ON RANK 0. RANK 0 AND ONLY RANK 0 WILL DECIDE IF WE + * TIMEOUT OR RETURN WITH DATA. It is possible that other + * ranks get timestep metadata before the timeout expires, + * but we don't care. Whatever would happen on rank 0 is + * what happens everywhere. + */ + + if (Biggest == -1) + { + // AllQueuesEmpty + if (timeout_sec >= 0.0) + { + waitForMetadataWithTimeout(Stream, timeout_sec); + } + else + { + waitForMetadataWithTimeout(Stream, FLT_MAX); + } + NextTimestep = + MaxQueuedMetadata(Stream); /* might be -1 if we timed out */ + } + else + { + /* + * we've actually got a choice here. "Smallest" is + * the LatestTimestep that everyone has. "Biggest" is + * the Latest that someone has seen, and presumably + * others will see shortly. I'm going to go with Biggest + * until I have a reason to prefer one or the other. + */ + if (mode == SstLatestAvailable) + { + // latest available + CP_verbose(Stream, PerRankVerbose, + "Returning Biggest timestep available " + "%ld because LatestAvailable " + "specified\n", + Biggest); + NextTimestep = Biggest; + } + else + { + // next available (take the oldest that everyone has) + CP_verbose(Stream, PerRankVerbose, + "Returning Smallest timestep available " + "%ld because NextAvailable specified\n", + Smallest); + NextTimestep = Smallest; + } + } + if ((NextTimestep == -1) && (Stream->Status == PeerClosed)) + { + /* force everyone to close */ + NextTimestep = -2; + } + if ((NextTimestep == -1) && (Stream->Status == PeerFailed)) + { + /* force everyone to return failed */ + NextTimestep = -3; + } + SMPI_Bcast(&NextTimestep, 1, SMPI_LONG, 0, Stream->mpiComm); + } + else + { + STREAM_MUTEX_UNLOCK(Stream); + SMPI_Bcast(&NextTimestep, 1, SMPI_LONG, 0, Stream->mpiComm); + STREAM_MUTEX_LOCK(Stream); + } + if (NextTimestep == -2) + { + /* there was a peerClosed setting on rank0, we'll close */ + Stream->Status = PeerClosed; + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning EndOfStream at timestep %d\n", + Stream->ReaderTimestep); + return SstEndOfStream; + } + if (NextTimestep == -3) + { + /* there was a peerFailed setting on rank0, we'll fail */ + Stream->Status = PeerFailed; + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning EndOfStream at timestep %d\n", + Stream->ReaderTimestep); + STREAM_MUTEX_UNLOCK(Stream); + Stream->DP_Interface->notifyConnFailure(&Svcs, Stream->DP_Stream, + 0); + STREAM_MUTEX_LOCK(Stream); + return SstFatalError; + } + if (NextTimestep == -1) + { + CP_verbose(Stream, PerStepVerbose, + "AdvancestepPeer timing out on no data\n"); + return SstTimeout; + } + if (mode == SstLatestAvailable) + { + // latest available + /* release all timesteps from before NextTimestep, then fall + * through below */ + /* Side note: It is possible that someone could get a "prior" + * timestep after this point. It has to be released upon + * arrival */ + CP_verbose(Stream, PerStepVerbose, + "timed or Latest timestep, determined NextTimestep %d\n", + NextTimestep); + Stream->DiscardPriorTimestep = NextTimestep; + releasePriorTimesteps(Stream, NextTimestep); + } + } + + Entry = waitForNextMetadata(Stream, Stream->ReaderTimestep); + + TAU_STOP("Waiting on metadata per rank per timestep"); + + if (Entry) + { + NotifyDPArrivedMetadata(Stream, Entry->MetadataMsg); + + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) + { + TAU_START("FFS marshaling case"); + FFSMarshalInstallMetadata(Stream, Entry->MetadataMsg); + TAU_STOP("FFS marshaling case"); + } + else if (Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) + { + AddFormatsToMetaMetaInfo(Stream, Entry->MetadataMsg); + AddAttributesToAttrDataList(Stream, Entry->MetadataMsg); + } + Stream->ReaderTimestep = Entry->MetadataMsg->Timestep; + SstFullMetadata Mdata = malloc(sizeof(struct _SstFullMetadata)); + memset(Mdata, 0, sizeof(struct _SstFullMetadata)); + Mdata->WriterCohortSize = Entry->MetadataMsg->CohortSize; + Mdata->WriterMetadata = + malloc(sizeof(Mdata->WriterMetadata[0]) * Mdata->WriterCohortSize); + for (int i = 0; i < Mdata->WriterCohortSize; i++) + { + Mdata->WriterMetadata[i] = &Entry->MetadataMsg->Metadata[i]; + } + if (Stream->DP_Interface->TimestepInfoFormats == NULL) + { + // DP didn't provide struct info, no valid data + Mdata->DP_TimestepInfo = NULL; + } + else + { + Mdata->DP_TimestepInfo = Entry->MetadataMsg->DP_TimestepInfo; + } + Stream->CurrentWorkingTimestep = Entry->MetadataMsg->Timestep; + Stream->CurrentMetadata = Mdata; + + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning Success on timestep %d\n", + Entry->MetadataMsg->Timestep); + return SstSuccess; + } + if (Stream->Status == PeerClosed) + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStepPeer returning EndOfStream at timestep %d\n", + Stream->ReaderTimestep); + return SstEndOfStream; + } + else + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning FatalError at timestep %d\n", + Stream->ReaderTimestep); + return SstFatalError; + } +} + +static SstStatusValue SstAdvanceStepMin(SstStream Stream, SstStepMode mode, + const float timeout_sec) +{ + TSMetadataDistributionMsg ReturnData; + struct _TimestepMetadataMsg *MetadataMsg; + SstStatusValue ret; + + void *free_block; + + if (Stream->Rank == 0) + { + struct _TimestepMetadataDistributionMsg msg; + SstStatusValue return_value = SstSuccess; + TSMetadataList RootEntry = NULL; + + memset(&msg, 0, sizeof(msg)); + msg.TSmsg = NULL; + msg.CommPatternLockedTimestep = -1; + if (Stream->CommPatternLocked == 1) + { + msg.CommPatternLockedTimestep = Stream->CommPatternLockedTimestep; + } + if ((timeout_sec >= 0.0) || (mode == SstLatestAvailable)) + { + long NextTimestep = -1; + long LatestTimestep = MaxQueuedMetadata(Stream); + /* + * Several situations are possible here, depending upon + * whether or not a timeout is specified and/or + * LatestAvailable is specified, and whether or not we + * have timesteps queued anywhere. If they want + * LatestAvailable and we have any Timesteps queued + * anywhere, we decide upon a timestep to return and + * assume that all ranks will get it soon (or else we're + * in failure mode). If there are no timesteps queued + * anywhere, then we're going to wait for timeout seconds + * ON RANK 0. RANK 0 AND ONLY RANK 0 WILL DECIDE IF WE + * TIMEOUT OR RETURN WITH DATA. It is possible that other + * ranks get timestep metadata before the timeout expires, + * but we don't care. Whatever would happen on rank 0 is + * what happens everywhere. + */ + + if (LatestTimestep == -1) + { + // AllQueuesEmpty + if (timeout_sec >= 0.0) + { + waitForMetadataWithTimeout(Stream, timeout_sec); + } + else + { + waitForMetadataWithTimeout(Stream, FLT_MAX); + } + NextTimestep = + MaxQueuedMetadata(Stream); /* might be -1 if we timed out */ + } + else + { + if (mode == SstLatestAvailable) + { + // latest available + CP_verbose(Stream, PerStepVerbose, + "Returning latest timestep available " + "%ld because LatestAvailable " + "specified\n", + LatestTimestep); + NextTimestep = LatestTimestep; + } + else + { + // next available (take the oldest that everyone has) + NextTimestep = NextQueuedMetadata(Stream); + CP_verbose(Stream, PerStepVerbose, + "Returning Smallest timestep available " + "%ld because NextAvailable specified\n", + NextTimestep); + } + } + if (Stream->Status == PeerFailed) + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStepMin returning FatalError because of " + "connection failure at timestep %d\n", + Stream->ReaderTimestep); + return_value = SstFatalError; + } + else if ((NextTimestep == -1) && (Stream->Status == PeerClosed)) + { + CP_verbose( + Stream, PerStepVerbose, + "SstAdvanceStepMin returning EndOfStream at timestep %d\n", + Stream->ReaderTimestep); + return_value = SstEndOfStream; + } + else if (NextTimestep == -1) + { + CP_verbose(Stream, PerStepVerbose, + "AdvancestepMin timing out on no data\n"); + return_value = SstTimeout; + } + else if (mode == SstLatestAvailable) + { + // latest available + /* release all timesteps from before NextTimestep, then fall + * through below */ + /* Side note: It is possible that someone could get a "prior" + * timestep after this point. It has to be released upon + * arrival */ + CP_verbose( + Stream, PerStepVerbose, + "timed or Latest timestep, determined NextTimestep %d\n", + NextTimestep); + Stream->DiscardPriorTimestep = NextTimestep; + releasePriorTimesteps(Stream, NextTimestep); + } + } + if (Stream->Status == PeerFailed) + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStepMin returning FatalError because of " + "conn failure at timestep %d\n", + Stream->ReaderTimestep); + return_value = SstFatalError; + } + if (return_value == SstSuccess) + { + RootEntry = waitForNextMetadata(Stream, Stream->ReaderTimestep); + } + if (RootEntry) + { + msg.TSmsg = RootEntry->MetadataMsg; + msg.ReturnValue = return_value; + CP_verbose(Stream, TraceVerbose, + "Setting TSmsg to Rootentry value\n"); + } + else + { + if (return_value == SstSuccess) + { + if (Stream->Status == PeerClosed) + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStepMin rank 0 returning " + "EndOfStream at timestep %d\n", + Stream->ReaderTimestep); + msg.ReturnValue = SstEndOfStream; + } + else + { + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStepMin rank 0 returning " + "FatalError at timestep %d\n", + Stream->ReaderTimestep); + msg.ReturnValue = SstFatalError; + } + CP_verbose(Stream, TraceVerbose, "Setting TSmsg to NULL\n"); + msg.TSmsg = NULL; + } + else + { + msg.ReturnValue = return_value; + } + } + // AddArrivedMetadataInfo(Stream, &msg); + ReturnData = CP_distributeDataFromRankZero( + Stream, &msg, Stream->CPInfo->TimestepDistributionFormat, + &free_block); + } + else + { + + STREAM_MUTEX_UNLOCK(Stream); + ReturnData = CP_distributeDataFromRankZero( + Stream, NULL, Stream->CPInfo->CombinedWriterInfoFormat, + &free_block); + STREAM_MUTEX_LOCK(Stream); + } + ret = (SstStatusValue)ReturnData->ReturnValue; + + if (ReturnData->ReturnValue != SstSuccess) + { + if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) && + (ReturnData->TSmsg)) + { + CP_verbose( + Stream, PerRankVerbose, + "SstAdvanceStep installing precious metadata before exiting\n"); + FFSMarshalInstallPreciousMetadata(Stream, ReturnData->TSmsg); + } + else if ((Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) && + (ReturnData->TSmsg)) + { + AddFormatsToMetaMetaInfo(Stream, ReturnData->TSmsg); + AddAttributesToAttrDataList(Stream, ReturnData->TSmsg); + } + + free(free_block); + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning FAILURE\n"); + return ret; + } + MetadataMsg = ReturnData->TSmsg; + + if (ReturnData->CommPatternLockedTimestep != -1) + { + Stream->CommPatternLockedTimestep = + ReturnData->CommPatternLockedTimestep; + Stream->CommPatternLocked = 2; + STREAM_MUTEX_UNLOCK(Stream); + if (Stream->DP_Interface->RSreadPatternLocked) + { + Stream->DP_Interface->RSreadPatternLocked( + &Svcs, Stream->DP_Stream, Stream->CommPatternLockedTimestep); + } + STREAM_MUTEX_LOCK(Stream); + } + if (MetadataMsg) + { + NotifyDPArrivedMetadata(Stream, MetadataMsg); + + Stream->ReaderTimestep = MetadataMsg->Timestep; + if (Stream->WriterConfigParams->MarshalMethod == SstMarshalFFS) + { + CP_verbose(Stream, TraceVerbose, + "Calling install metadata from metadata block %p\n", + MetadataMsg); + FFSMarshalInstallMetadata(Stream, MetadataMsg); + } + else if (Stream->WriterConfigParams->MarshalMethod == SstMarshalBP5) + { + AddFormatsToMetaMetaInfo(Stream, MetadataMsg); + AddAttributesToAttrDataList(Stream, MetadataMsg); + } + SstFullMetadata Mdata = malloc(sizeof(struct _SstFullMetadata)); + memset(Mdata, 0, sizeof(struct _SstFullMetadata)); + Mdata->WriterCohortSize = MetadataMsg->CohortSize; + Mdata->WriterMetadata = + malloc(sizeof(Mdata->WriterMetadata[0]) * Mdata->WriterCohortSize); + for (int i = 0; i < Mdata->WriterCohortSize; i++) + { + Mdata->WriterMetadata[i] = &MetadataMsg->Metadata[i]; + } + if (Stream->DP_Interface->TimestepInfoFormats == NULL) + { + // DP didn't provide struct info, no valid data + Mdata->DP_TimestepInfo = NULL; + } + else + { + Mdata->DP_TimestepInfo = MetadataMsg->DP_TimestepInfo; + } + Stream->CurrentWorkingTimestep = MetadataMsg->Timestep; + Mdata->FreeBlock = free_block; + Stream->CurrentMetadata = Mdata; + + CP_verbose(Stream, PerStepVerbose, + "SstAdvanceStep returning Success on timestep %d\n", + MetadataMsg->Timestep); + return SstSuccess; + } + CP_verbose(Stream, TraceVerbose, "SstAdvanceStep final return\n"); + return ret; +} + +// SstAdvanceStep is only called by the main program thread. +extern SstStatusValue SstAdvanceStep(SstStream Stream, const float timeout_sec) +{ + + SstStatusValue result; + STREAM_MUTEX_LOCK(Stream); + if (Stream->CurrentMetadata != NULL) + { + if (Stream->CurrentMetadata->FreeBlock) + { + free(Stream->CurrentMetadata->FreeBlock); + } + if (Stream->CurrentMetadata->WriterMetadata) + { + free(Stream->CurrentMetadata->WriterMetadata); + } + free(Stream->CurrentMetadata); + Stream->CurrentMetadata = NULL; + } + + SstStepMode mode = SstNextAvailable; + if (Stream->ConfigParams->AlwaysProvideLatestTimestep) + { + mode = SstLatestAvailable; + } + if (Stream->WriterConfigParams->CPCommPattern == SstCPCommPeer) + { + result = SstAdvanceStepPeer(Stream, mode, timeout_sec); + } + else + { + result = SstAdvanceStepMin(Stream, mode, timeout_sec); + } + if (result == SstSuccess) + { + Stream->Stats.TimestepsConsumed++; + } + STREAM_MUTEX_UNLOCK(Stream); + return result; +} + +// SstReaderClose is only called by the main program thread and +// needs no locking as it only accesses data set by the main thread +extern void SstReaderClose(SstStream Stream) +{ + /* need to have a reader-side shutdown protocol, but for now, just sleep for + * a little while to makes sure our release message for the last timestep + * got received */ + struct timeval CloseTime, Diff; + struct _ReaderCloseMsg Msg; + /* wait until each reader rank has done SstReaderClose() */ + SMPI_Barrier(Stream->mpiComm); + gettimeofday(&CloseTime, NULL); + timersub(&CloseTime, &Stream->ValidStartTime, &Diff); + memset(&Msg, 0, sizeof(Msg)); + sendOneToEachWriterRank(Stream, Stream->CPInfo->SharedCM->ReaderCloseFormat, + &Msg, &Msg.WSR_Stream); + Stream->Stats.StreamValidTimeSecs = + (double)Diff.tv_usec / 1e6 + Diff.tv_sec; + + if (Stream->CPVerbosityLevel >= (int)SummaryVerbose) + { + DoStreamSummary(Stream); + } + CMusleep(Stream->CPInfo->SharedCM->cm, 100000); + if (Stream->CurrentMetadata != NULL) + { + if (Stream->CurrentMetadata->FreeBlock) + free(Stream->CurrentMetadata->FreeBlock); + if (Stream->CurrentMetadata->WriterMetadata) + free(Stream->CurrentMetadata->WriterMetadata); + free(Stream->CurrentMetadata); + Stream->CurrentMetadata = NULL; + } +} + +// SstWaitForCompletion is only called by the main program thread and +// needs no locking +extern SstStatusValue SstWaitForCompletion(SstStream Stream, void *handle) +{ + if (Stream->ConfigParams->ReaderShortCircuitReads) + return SstSuccess; + if (Stream->DP_Interface->waitForCompletion(&Svcs, handle) != 1) + { + return SstFatalError; + } + else + { + return SstSuccess; + } +} diff --git a/source/adios2/toolkit/sst/cp/cp_writer.c b/source/adios2/toolkit/sst/cp/cp_writer.c index b819fb29c5..2ae0b122fc 100644 --- a/source/adios2/toolkit/sst/cp/cp_writer.c +++ b/source/adios2/toolkit/sst/cp/cp_writer.c @@ -2409,6 +2409,37 @@ extern void SstProvideTimestep(SstStream Stream, SstData LocalMetadata, FreeAttributeData, FreeAttributeClientData); } +extern void SstProvideTimestepMM(SstStream Stream, SstData LocalMetadata, + SstData Data, long Timestep, + DataFreeFunc FreeTimestep, + void *FreeClientData, SstData AttributeData, + DataFreeFunc FreeAttributeData, + void *FreeAttributeClientData, + struct _SstMetaMetaBlock *MMBlocks) +{ + FFSFormatList Formats = NULL; + while (MMBlocks && MMBlocks->BlockData) + { + FFSFormatList New = malloc(sizeof(*New)); + New->FormatServerRep = MMBlocks->BlockData; + New->FormatServerRepLen = MMBlocks->BlockSize; + New->FormatIDRep = MMBlocks->ID; + New->FormatIDRepLen = MMBlocks->IDSize; + New->Next = Formats; + Formats = New; + MMBlocks++; + } + SstInternalProvideTimestep(Stream, LocalMetadata, Data, Timestep, Formats, + FreeTimestep, FreeClientData, AttributeData, + FreeAttributeData, FreeAttributeClientData); + while (Formats) + { + FFSFormatList Tmp = Formats->Next; + free(Formats); + Formats = Tmp; + } +} + void queueReaderRegisterMsgAndNotify(SstStream Stream, struct _ReaderRegisterMsg *Req, CMConnection conn) diff --git a/source/adios2/toolkit/sst/sst.h b/source/adios2/toolkit/sst/sst.h index 6f4fb64562..49dea22ecc 100644 --- a/source/adios2/toolkit/sst/sst.h +++ b/source/adios2/toolkit/sst/sst.h @@ -27,8 +27,10 @@ typedef struct _SstStream *SstStream; /* * metadata and typedefs are tentative and may come from ADIOS2 constructors. */ +typedef struct _SstMetaMetaBlock *SstMetaMetaList; typedef struct _SstFullMetadata *SstFullMetadata; typedef struct _SstData *SstData; +typedef struct _SstBlock *SstBlock; typedef enum { @@ -52,7 +54,8 @@ typedef struct _SstParams *SstParams; typedef enum { SstMarshalFFS, - SstMarshalBP + SstMarshalBP, + SstMarshalBP5 } SstMarshalMethod; typedef enum @@ -88,6 +91,11 @@ extern void SstProvideTimestep(SstStream s, SstData LocalMetadata, SstData AttributeData, DataFreeFunc FreeAttribute, void *FreeAttributeClientData); +extern void +SstProvideTimestepMM(SstStream s, SstData LocalMetadata, SstData LocalData, + long Timestep, DataFreeFunc FreeData, void *FreeClientData, + SstData AttributeData, DataFreeFunc FreeAttribute, + void *FreeAttributeClientData, SstMetaMetaList MMBlocks); extern void SstWriterClose(SstStream stream); /* SstWriterDefinitionLock is called once only, on transition from unlock to * locked definitions */ @@ -99,8 +107,11 @@ extern void SstWriterDefinitionLock(SstStream stream, long EffectiveTimestep); extern SstStream SstReaderOpen(const char *filename, SstParams Params, SMPI_Comm comm); extern void SstReaderGetParams(SstStream stream, - SstMarshalMethod *WriterMarshalMethod); + SstMarshalMethod *WriterMarshalMethod, + int *WriterIsRowMajor); extern SstFullMetadata SstGetCurMetadata(SstStream stream); +extern SstMetaMetaList SstGetNewMetaMetaData(SstStream stream, long timestep); +extern SstBlock SstGetAttributeData(SstStream stream, long timestep); extern void *SstReadRemoteMemory(SstStream s, int rank, long timestep, size_t offset, size_t length, void *buffer, void *DP_TimestepInfo); diff --git a/source/adios2/toolkit/sst/sst_data.h b/source/adios2/toolkit/sst/sst_data.h index e3490fdc03..a223a84237 100644 --- a/source/adios2/toolkit/sst/sst_data.h +++ b/source/adios2/toolkit/sst/sst_data.h @@ -25,6 +25,14 @@ struct _SstBlock char *BlockData; }; +struct _SstMetaMetaBlock +{ + char *BlockData; + size_t BlockSize; + char *ID; + size_t IDSize; +}; + /* * Struct that represents statistics tracked by SST */ diff --git a/testing/adios2/engine/bp/CMakeLists.txt b/testing/adios2/engine/bp/CMakeLists.txt index 4823746243..970933116a 100644 --- a/testing/adios2/engine/bp/CMakeLists.txt +++ b/testing/adios2/engine/bp/CMakeLists.txt @@ -5,9 +5,11 @@ set(BP3_DIR ${CMAKE_CURRENT_BINARY_DIR}/bp3) set(BP4_DIR ${CMAKE_CURRENT_BINARY_DIR}/bp4) +set(BP5_DIR ${CMAKE_CURRENT_BINARY_DIR}/bp5) set(FS_DIR ${CMAKE_CURRENT_BINARY_DIR}/filestream) file(MAKE_DIRECTORY ${BP3_DIR}) file(MAKE_DIRECTORY ${BP4_DIR}) +file(MAKE_DIRECTORY ${BP5_DIR}) file(MAKE_DIRECTORY ${FS_DIR}) macro(bp3_bp4_gtest_add_tests_helper testname mpi) @@ -17,6 +19,9 @@ macro(bp3_bp4_gtest_add_tests_helper testname mpi) gtest_add_tests_helper(${testname} ${mpi} BP Engine.BP. .BP4 WORKING_DIRECTORY ${BP4_DIR} EXTRA_ARGS "BP4" ) +# gtest_add_tests_helper(${testname} ${mpi} BP Engine.BP. .BP5 +# WORKING_DIRECTORY ${BP5_DIR} EXTRA_ARGS "BP5" +# ) endmacro() add_subdirectory(operations) diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index f2902fd894..886211f249 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -157,6 +157,7 @@ MutateTestSet( COMM_PEER_SST_TESTS "CommPeer" writer "CPCommPattern=Peer" "${BAS list (REMOVE_ITEM COMM_PEER_SST_TESTS "PreciousTimestep") MutateTestSet( FFS_SST_TESTS "FFS" writer "MarshalMethod=FFS" "${COMM_MIN_SST_TESTS};${COMM_PEER_SST_TESTS}" ) +MutateTestSet( FFS_SST_TESTS "BP5" writer "MarshalMethod=BP5" "${COMM_MIN_SST_TESTS};${COMM_PEER_SST_TESTS}" ) MutateTestSet( BP_SST_TESTS "BP" writer "MarshalMethod=BP" "${COMM_MIN_SST_TESTS};${COMM_PEER_SST_TESTS}" ) set (SST_TESTS "") @@ -232,6 +233,20 @@ if(NOT MSVC) # not on windows endforeach() endif() +# BP5 tests +if(NOT MSVC) # not on windows + set (BP5_TESTS ${ALL_SIMPLE_TESTS}) + # Delayed reader not worth testing on file engines + list (FILTER BP5_TESTS EXCLUDE REGEX "DelayedReader") + # Attribute's not implemented yet for BP5 + list (FILTER BP5_TESTS EXCLUDE REGEX "Attr") + # The nobody-writes-data-in-a-timestep tests don't work for any BP file engine +# list (FILTER BP5_TESTS EXCLUDE REGEX ".*NoData$") + foreach(test ${BP5_TESTS}) + add_common_test(${test} BP5) + endforeach() +endif() + # # Setup streaming tests for BP4 engine diff --git a/testing/adios2/engine/staging-common/run_test.py.gen.in b/testing/adios2/engine/staging-common/run_test.py.gen.in index f3bafae29c..cb77305225 100755 --- a/testing/adios2/engine/staging-common/run_test.py.gen.in +++ b/testing/adios2/engine/staging-common/run_test.py.gen.in @@ -16,6 +16,7 @@ is_file_engine = {"bpfile": True, "bp": True, "bp3": True, "bp4": True, + "bp5": True, "bp4_stream": False, "hdfmixer": True, "dataman": False, From 52f060268ce8e8143a17e8a36952e1672a7af24a Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 23 May 2021 21:08:04 -0400 Subject: [PATCH 2/8] CI changes --- CMakeLists.txt | 3 +- cmake/DetectOptions.cmake | 18 +++++----- source/adios2/CMakeLists.txt | 29 ++++++++++------ source/adios2/core/IO.cpp | 10 +++++- source/adios2/engine/bp5/BP5Engine.cpp | 33 +++++++++++-------- source/adios2/engine/bp5/BP5Engine.h | 1 - source/adios2/engine/bp5/BP5Reader.cpp | 29 ++++------------ source/adios2/engine/bp5/BP5Reader.h | 2 +- source/adios2/engine/bp5/BP5Writer.cpp | 18 ++++------ source/adios2/engine/bp5/BP5Writer.h | 4 ++- source/adios2/engine/sst/SstReader.cpp | 7 +++- source/adios2/engine/sst/SstWriter.cpp | 12 ++++--- source/adios2/engine/sst/SstWriter.h | 1 + .../toolkit/format/bp5/BP5Deserializer.cpp | 9 +++++ .../toolkit/format/bp5/BP5Serializer.cpp | 24 ++++++++++---- .../adios2/toolkit/format/bp5/BP5Serializer.h | 16 ++++++--- source/adios2/toolkit/sst/cp/cp_reader.c | 15 +++++++++ .../engine/staging-common/CMakeLists.txt | 4 +-- thirdparty/CMakeLists.txt | 2 +- 19 files changed, 144 insertions(+), 93 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f5d081dfde..03d9e4ef13 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,6 +129,7 @@ adios_option(DataSpaces "Enable support for DATASPACES" AUTO) adios_option(SSC "Enable support for SSC" AUTO) adios_option(Table "Enable support for Table" AUTO) adios_option(SST "Enable support for SST" AUTO) +adios_option(BP5 "Enable support for BP5" AUTO) adios_option(ZeroMQ "Enable support for ZeroMQ" AUTO) adios_option(HDF5 "Enable support for the HDF5 engine" AUTO) adios_option(IME "Enable support for DDN IME transport" AUTO) @@ -150,7 +151,7 @@ if(ADIOS2_HAVE_MPI) endif() set(ADIOS2_CONFIG_OPTS - Blosc BZip2 ZFP SZ MGARD PNG MPI DataMan DAOS Table SSC SST DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse + Blosc BZip2 ZFP SZ MGARD PNG MPI DataMan DAOS Table SSC SST BP5 DataSpaces ZeroMQ HDF5 HDF5_VOL IME Python Fortran SysVShMem Profiling Endian_Reverse ) GenerateADIOSHeaderConfig(${ADIOS2_CONFIG_OPTS}) configure_file( diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake index e27ef0e48a..2df23a8ad3 100644 --- a/cmake/DetectOptions.cmake +++ b/cmake/DetectOptions.cmake @@ -216,9 +216,9 @@ endif() # DataSpaces if(ADIOS2_USE_DataSpaces STREQUAL AUTO) - find_package(DataSpaces 2.1.1) + find_package(DataSpaces 1.8) elseif(ADIOS2_USE_DataSpaces) - find_package(DataSpaces 2.1.1 REQUIRED) + find_package(DataSpaces 1.8 REQUIRED) endif() if(DATASPACES_FOUND) set(ADIOS2_HAVE_DataSpaces TRUE) @@ -293,7 +293,7 @@ if(Python_FOUND) endif() # Even if no python support, we still want the interpreter for tests -if(BUILD_TESTING AND NOT Python_Interpreter_FOUND) +if(NOT Python_Interpreter_FOUND) find_package(Python REQUIRED COMPONENTS Interpreter) endif() @@ -317,7 +317,7 @@ if(Python_Interpreter_FOUND) endif() # Sst -if(ADIOS2_USE_SST AND NOT MSVC) +if(ADIOS2_USE_SST AND NOT WIN32) set(ADIOS2_HAVE_SST TRUE) find_package(LIBFABRIC 1.6) if(LIBFABRIC_FOUND) @@ -329,10 +329,10 @@ if(ADIOS2_USE_SST AND NOT MSVC) endif() endif() -find_package(DAOS) - if(DAOS_FOUND) - set(ADIOS2_HAVE_DAOS TRUE) - endif() +# BP5 +if(ADIOS2_USE_BP5 AND NOT WIN32) + set(ADIOS2_HAVE_BP5 TRUE) +endif() #SysV IPC if(UNIX) @@ -372,7 +372,7 @@ include(CheckTypeRepresentation) #check_float_type_representation(double DOUBLE_TYPE_C) #check_float_type_representation("long double" LONG_DOUBLE_TYPE_C) -if(ADIOS2_HAVE_Fortran) +if(ADIOS2_USE_Fortran) #check_float_type_representation(real REAL_TYPE_Fortran LANGUAGE Fortran) #check_float_type_representation("real(kind=4)" REAL4_TYPE_Fortran LANGUAGE Fortran) #check_float_type_representation("real(kind=8)" REAL8_TYPE_Fortran LANGUAGE Fortran) diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index f96498fcf7..f1a121c789 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -56,10 +56,6 @@ add_library(adios2_core engine/bp4/BP4Reader.cpp engine/bp4/BP4Reader.tcc engine/bp4/BP4Writer.cpp engine/bp4/BP4Writer.tcc - engine/bp5/BP5Engine.cpp - engine/bp5/BP5Reader.cpp engine/bp5/BP5Reader.tcc - engine/bp5/BP5Writer.cpp engine/bp5/BP5Writer.tcc - engine/skeleton/SkeletonReader.cpp engine/skeleton/SkeletonReader.tcc engine/skeleton/SkeletonWriter.cpp engine/skeleton/SkeletonWriter.tcc @@ -73,7 +69,6 @@ add_library(adios2_core toolkit/format/buffer/Buffer.cpp toolkit/format/buffer/BufferV.cpp toolkit/format/buffer/heap/BufferSTL.cpp - toolkit/format/buffer/ffs/BufferFFS.cpp toolkit/format/bp/BPBase.cpp toolkit/format/bp/BPBase.tcc toolkit/format/bp/BPSerializer.cpp toolkit/format/bp/BPSerializer.tcc @@ -126,7 +121,7 @@ target_include_directories(adios2_core $ $ ) -target_link_libraries(adios2_core PRIVATE adios2sys_interface adios2::thirdparty::pugixml taustubs adios2::thirdparty::yaml-cpp ffs::ffs) +target_link_libraries(adios2_core PRIVATE adios2sys_interface adios2::thirdparty::pugixml taustubs adios2::thirdparty::yaml-cpp) target_link_libraries(adios2_core PUBLIC ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(adios2_core PUBLIC "$") @@ -134,6 +129,24 @@ if(UNIX) target_sources(adios2_core PRIVATE toolkit/transport/file/FilePOSIX.cpp) endif() +if (ADIOS2_HAVE_BP5) + target_sources(adios2_core PRIVATE + engine/bp5/BP5Engine.cpp + engine/bp5/BP5Reader.cpp engine/bp5/BP5Reader.tcc + engine/bp5/BP5Writer.cpp engine/bp5/BP5Writer.tcc + ) +endif() + +if (ADIOS2_HAVE_BP5 OR ADIOS2_HAVE_SST) + target_sources(adios2_core PRIVATE + toolkit/format/buffer/ffs/BufferFFS.cpp + toolkit/format/bp5/BP5Base.cpp + toolkit/format/bp5/BP5Serializer.cpp + toolkit/format/bp5/BP5Deserializer.cpp toolkit/format/bp5/BP5Deserializer.tcc + ) + target_link_libraries(adios2_core PRIVATE ffs::ffs) +endif() + if(ADIOS2_HAVE_DAOS) target_sources(adios2_core PRIVATE toolkit/transport/file/FileDaos.cpp) target_link_libraries(adios2_core PRIVATE DAOS::DAOS) @@ -217,10 +230,6 @@ if(ADIOS2_HAVE_SST) add_subdirectory(toolkit/sst) target_sources(adios2_core PRIVATE - toolkit/format/bp5/BP5Base.cpp - toolkit/format/bp5/BP5Serializer.cpp - toolkit/format/bp5/BP5Deserializer.cpp toolkit/format/bp5/BP5Deserializer.tcc - engine/sst/SstReader.cpp engine/sst/SstWriter.cpp engine/sst/SstParamParser.cpp diff --git a/source/adios2/core/IO.cpp b/source/adios2/core/IO.cpp index 41f087e586..ad951f35a8 100644 --- a/source/adios2/core/IO.cpp +++ b/source/adios2/core/IO.cpp @@ -22,8 +22,10 @@ #include "adios2/engine/bp3/BP3Writer.h" #include "adios2/engine/bp4/BP4Reader.h" #include "adios2/engine/bp4/BP4Writer.h" +#ifdef ADIOS2_HAVE_BP5 #include "adios2/engine/bp5/BP5Reader.h" #include "adios2/engine/bp5/BP5Writer.h" +#endif #include "adios2/engine/inline/InlineReader.h" #include "adios2/engine/inline/InlineWriter.h" #include "adios2/engine/null/NullEngine.h" @@ -68,7 +70,13 @@ std::unordered_map Factory = { {"bp4", {IO::MakeEngine, IO::MakeEngine}}, {"bp5", - {IO::MakeEngine, IO::MakeEngine}}, +#ifdef ADIOS2_HAVE_BP5 + {IO::MakeEngine, IO::MakeEngine} +#else + IO::NoEngineEntry("ERROR: this version didn't compile with " + "BP5 library, can't use BP5 engine\n") +#endif + }, {"hdfmixer", #ifdef ADIOS2_HAVE_HDF5 IO_MakeEngine_HDFMixer() diff --git a/source/adios2/engine/bp5/BP5Engine.cpp b/source/adios2/engine/bp5/BP5Engine.cpp index 1fe9364918..7d9706a65c 100644 --- a/source/adios2/engine/bp5/BP5Engine.cpp +++ b/source/adios2/engine/bp5/BP5Engine.cpp @@ -133,8 +133,10 @@ void BP5Engine::ParseParams(IO &io, struct BP5Params &Params) { std::memset(&Params, 0, sizeof(Params)); - auto lf_SetBoolParameter = [&](const std::string key, bool ¶meter) { + auto lf_SetBoolParameter = [&](const std::string key, bool ¶meter, + bool def) { auto itKey = io.m_Parameters.find(key); + parameter = def; if (itKey != io.m_Parameters.end()) { std::string value = itKey->second; @@ -155,8 +157,10 @@ void BP5Engine::ParseParams(IO &io, struct BP5Params &Params) } } }; - auto lf_SetIntParameter = [&](const std::string key, int ¶meter) { + auto lf_SetIntParameter = [&](const std::string key, int ¶meter, + int def) { auto itKey = io.m_Parameters.find(key); + parameter = def; if (itKey != io.m_Parameters.end()) { parameter = std::stoi(itKey->second); @@ -165,20 +169,21 @@ void BP5Engine::ParseParams(IO &io, struct BP5Params &Params) return false; }; - auto lf_SetStringParameter = [&](const std::string key, - std::string ¶meter) { - auto itKey = io.m_Parameters.find(key); - if (itKey != io.m_Parameters.end()) - { - parameter = itKey->second; - return true; - } - return false; - }; + // auto lf_SetStringParameter = [&](const std::string key, + // std::string ¶meter, char *def) { + // std::cout << "Set String Param , key = " << key << std::endl; + // auto itKey = io.m_Parameters.find(key); + // if (itKey != io.m_Parameters.end()) + // { + // parameter = (itKey->second).c_str(); + // return true; + // } + // return false; + // }; #define get_params(Param, Type, Typedecl, Default) \ - Params.Param = Default; \ - lf_Set##Type##Parameter(#Param, Params.Param); + std::cout << "GetParam, Param = " << #Param << std::endl; \ + lf_Set##Type##Parameter(#Param, Params.Param, Default); BP5_FOREACH_PARAMETER_TYPE_4ARGS(get_params); #undef get_params }; diff --git a/source/adios2/engine/bp5/BP5Engine.h b/source/adios2/engine/bp5/BP5Engine.h index 5e276d207c..1b6e502a65 100644 --- a/source/adios2/engine/bp5/BP5Engine.h +++ b/source/adios2/engine/bp5/BP5Engine.h @@ -84,7 +84,6 @@ class BP5Engine MACRO(StreamReader, Bool, bool, false) \ MACRO(BurstBufferDrain, Bool, bool, true) \ MACRO(NodeLocal, Bool, bool, false) \ - MACRO(BurstBufferPath, String, std::string, "\"\"") \ MACRO(verbose, Int, int, 0) \ MACRO(CollectiveMetadata, Bool, bool, true) \ MACRO(ReaderShortCircuitReads, Bool, bool, false) diff --git a/source/adios2/engine/bp5/BP5Reader.cpp b/source/adios2/engine/bp5/BP5Reader.cpp index 1e9b917664..2fa4abd812 100644 --- a/source/adios2/engine/bp5/BP5Reader.cpp +++ b/source/adios2/engine/bp5/BP5Reader.cpp @@ -33,6 +33,12 @@ BP5Reader::BP5Reader(IO &io, const std::string &name, const Mode mode, Init(); } +BP5Reader::~BP5Reader() +{ + if (m_BP5Deserializer) + delete m_BP5Deserializer; +} + StepStatus BP5Reader::BeginStep(StepMode mode, const float timeoutSeconds) { TAU_SCOPED_TIMER("BP5Reader::BeginStep"); @@ -125,16 +131,9 @@ void BP5Reader::ReadData(const size_t WriterRank, const size_t Timestep, char *Destination) { size_t DataStartPos = m_MetadataIndexTable[Timestep][2]; - std::cout << "DataOffsetsStart in MDatafile is " << DataStartPos - << std::endl; DataStartPos += WriterRank * sizeof(uint64_t); - std::cout << "DataOffsetsStart after addition is " << DataStartPos - << std::endl; size_t DataStart = helper::ReadValue( m_MetadataIndex.m_Buffer, DataStartPos, m_Minifooter.IsLittleEndian); - std::cout << "Data start for timestep " << Timestep << " Rank " - << WriterRank << " is " << std::hex << DataStart << std::dec - << std::endl; // check if subfile is already opened if (m_DataFileManager.m_Transports.count(WriterRank) == 0) { @@ -367,8 +366,6 @@ uint64_t BP5Reader::MetadataExpectedMinFileSize(const std::string &IdxFileName, { size_t cur_idxsize = m_MetadataIndex.m_Buffer.size(); static constexpr size_t m_MinIndexRecordSize = 3 * sizeof(uint64_t); - std::cout << " metadata expected min file size Cur = " << cur_idxsize - << " has header " << hasHeader << std::endl; if ((hasHeader && cur_idxsize < m_IndexHeaderSize + m_MinIndexRecordSize) || cur_idxsize < m_MinIndexRecordSize) { @@ -377,8 +374,6 @@ uint64_t BP5Reader::MetadataExpectedMinFileSize(const std::string &IdxFileName, } uint64_t lastpos = *(uint64_t *)&(m_MetadataIndex.m_Buffer[cur_idxsize - 24]); - std::cout << " metadata expected min file size returning lastpos = " - << lastpos << std::endl; return lastpos; } @@ -493,8 +488,6 @@ void BP5Reader::InitBuffer(const TimePoint &timeoutInstant, // now we are sure the index header has been parsed, first step parsing // done - std::cout << "Reader row major " << m_ReaderIsRowMajor << std::endl; - std::cout << "Writer row major " << m_WriterIsRowMajor << std::endl; m_BP5Deserializer = new format::BP5Deserializer( m_WriterCount, m_WriterIsRowMajor, m_ReaderIsRowMajor); m_BP5Deserializer->m_Engine = this; @@ -577,20 +570,15 @@ void BP5Reader::ParseMetadataIndex(format::BufferSTL &bufferSTL, position = m_ColumnMajorFlagPosition; const uint8_t val = helper::ReadValue( buffer, position, m_Minifooter.IsLittleEndian); - std::cout << "Row major char is '" << val << "'" << std::endl; m_WriterIsRowMajor = val == 'n'; // move position to first row position = 64; } - std::cout << "Mini foot vers version " << (int)m_Minifooter.Version - << std::endl; for (uint64_t i = 0; i < m_WriterCount; i++) { m_WriterToFileMap.push_back(helper::ReadValue( buffer, position, m_Minifooter.IsLittleEndian)); - std::cout << "Writer " << i << " wrote to file " << m_WriterToFileMap[i] - << std::endl; } // Read each record now @@ -598,7 +586,6 @@ void BP5Reader::ParseMetadataIndex(format::BufferSTL &bufferSTL, do { std::vector ptrs; - std::cout << "Start Timestep position " << position << std::endl; const uint64_t MetadataPos = helper::ReadValue( buffer, position, m_Minifooter.IsLittleEndian); const uint64_t MetadataSize = helper::ReadValue( @@ -608,15 +595,11 @@ void BP5Reader::ParseMetadataIndex(format::BufferSTL &bufferSTL, ptrs.push_back(MetadataSize); ptrs.push_back(position); m_MetadataIndexTable[currentStep] = ptrs; - std::cout << "Timestep " << currentStep << " has MetadataStart " - << ptrs[0] << " MetadataSize " << ptrs[1] << std::endl; for (uint64_t i = 0; i < m_WriterCount; i++) { size_t DataPosPos = ptrs[2] + sizeof(uint64_t) * i; const uint64_t DataPos = helper::ReadValue( buffer, DataPosPos, m_Minifooter.IsLittleEndian); - std::cout << "Writer " << i << " data starts at " << DataPos - << std::endl; } position += sizeof(uint64_t) * m_WriterCount; diff --git a/source/adios2/engine/bp5/BP5Reader.h b/source/adios2/engine/bp5/BP5Reader.h index 916b6e11c4..6cd73b6310 100644 --- a/source/adios2/engine/bp5/BP5Reader.h +++ b/source/adios2/engine/bp5/BP5Reader.h @@ -41,7 +41,7 @@ class BP5Reader : public BP5Engine, public Engine BP5Reader(IO &io, const std::string &name, const Mode mode, helper::Comm comm); - virtual ~BP5Reader() = default; + ~BP5Reader(); StepStatus BeginStep(StepMode mode = StepMode::Read, const float timeoutSeconds = -1.0) final; diff --git a/source/adios2/engine/bp5/BP5Writer.cpp b/source/adios2/engine/bp5/BP5Writer.cpp index 2f1abe3884..0bb535b1c9 100644 --- a/source/adios2/engine/bp5/BP5Writer.cpp +++ b/source/adios2/engine/bp5/BP5Writer.cpp @@ -68,7 +68,8 @@ void BP5Writer::WriteMetaMetadata( } } -uint64_t BP5Writer::WriteMetadata(const std::vector MetaDataBlocks) +uint64_t BP5Writer::WriteMetadata( + const std::vector MetaDataBlocks) { uint64_t MDataTotalSize = 0; uint64_t MetaDataSize = 0; @@ -109,8 +110,8 @@ void BP5Writer::WriteData(format::BufferV *Data) DataSize += DataVec[i].iov_len; i++; } - std::cout << "before update m_DataPos is " << m_DataPos << std::endl; m_DataPos += DataSize; + delete[] DataVec; } void BP5Writer::WriteMetadataFileIndex(uint64_t MetaDataPos, @@ -124,11 +125,6 @@ void BP5Writer::WriteMetadataFileIndex(uint64_t MetaDataPos, buf[0] = MetaDataPos; buf[1] = MetaDataSize; m_FileMetadataIndexManager.WriteFiles((char *)buf, sizeof(buf)); - for (int i = 0; i < DataSizes.size(); i++) - { - std::cout << "Writer data pos rank " << i << " = " << m_WriterDataPos[i] - << std::endl; - } m_FileMetadataIndexManager.WriteFiles((char *)m_WriterDataPos.data(), DataSizes.size() * sizeof(uint64_t)); for (int i = 0; i < DataSizes.size(); i++) @@ -148,8 +144,6 @@ void BP5Writer::EndStep() * AttributeEncodeBuffer and the data encode Vector */ /* the first */ - std::cout << "Endstp, data buffer size = " << TSInfo.DataBuffer->Size() - << std::endl; std::vector MetaBuffer = m_BP5Serializer.CopyMetadataToContiguous( TSInfo.NewMetaMetaBlocks, TSInfo.MetaEncodeBuffer, TSInfo.DataBuffer->Size()); @@ -174,7 +168,6 @@ void BP5Writer::EndStep() std::vector DataSizes; auto Metadata = m_BP5Serializer.BreakoutContiguousMetadata( RecvBuffer, RecvCounts, UniqueMetaMetaBlocks, DataSizes); - std::cout << "Data sizes size " << DataSizes.size() << std::endl; WriteMetaMetadata(UniqueMetaMetaBlocks); uint64_t ThisMetaDataPos = m_MetaDataPos; uint64_t ThisMetaDataSize = WriteMetadata(Metadata); @@ -228,7 +221,8 @@ void BP5Writer::InitTransports() m_BBName = m_Name; if (m_WriteToBB) { - m_BBName = m_Parameters.BurstBufferPath + PathSeparator + m_Name; + // m_BBName = m_Parameters.BurstBufferPath + PathSeparator + + // m_Name; } if (m_Aggregator.m_IsConsumer) @@ -439,7 +433,7 @@ void BP5Writer::MakeHeader(format::BufferSTL &b, const std::string fileType, // byte 39: Minor file version const uint8_t subversion = 0; - helper::CopyToBuffer(buffer, position, &version); + helper::CopyToBuffer(buffer, position, &subversion); // bytes 40-43 writer count const uint32_t WriterCount = m_Comm.Size(); diff --git a/source/adios2/engine/bp5/BP5Writer.h b/source/adios2/engine/bp5/BP5Writer.h index cfcec6c43f..6f4070b95f 100644 --- a/source/adios2/engine/bp5/BP5Writer.h +++ b/source/adios2/engine/bp5/BP5Writer.h @@ -16,6 +16,7 @@ #include "adios2/toolkit/aggregator/mpi/MPIChain.h" #include "adios2/toolkit/burstbuffer/FileDrainerSingleThread.h" #include "adios2/toolkit/format/bp5/BP5Serializer.h" +#include "adios2/toolkit/format/buffer/BufferV.h" #include "adios2/toolkit/transportman/TransportMan.h" namespace adios2 @@ -122,7 +123,8 @@ class BP5Writer : public BP5Engine, public core::Engine void WriteMetadataFileIndex(uint64_t MetaDataPos, uint64_t MetaDataSize, std::vector DataSizes); - uint64_t WriteMetadata(const std::vector MetaDataBlocks); + uint64_t + WriteMetadata(const std::vector MetaDataBlocks); void WriteData(format::BufferV *Data); diff --git a/source/adios2/engine/sst/SstReader.cpp b/source/adios2/engine/sst/SstReader.cpp index d80512d482..cb858b2f84 100644 --- a/source/adios2/engine/sst/SstReader.cpp +++ b/source/adios2/engine/sst/SstReader.cpp @@ -232,7 +232,12 @@ SstReader::SstReader(IO &io, const std::string &name, const Mode mode, delete[] cstr; } -SstReader::~SstReader() { SstStreamDestroy(m_Input); } +SstReader::~SstReader() +{ + if (m_BP5Deserializer) + delete m_BP5Deserializer; + SstStreamDestroy(m_Input); +} StepStatus SstReader::BeginStep(StepMode Mode, const float timeout_sec) { diff --git a/source/adios2/engine/sst/SstWriter.cpp b/source/adios2/engine/sst/SstWriter.cpp index 9320cfa10a..db2fc82865 100644 --- a/source/adios2/engine/sst/SstWriter.cpp +++ b/source/adios2/engine/sst/SstWriter.cpp @@ -258,10 +258,11 @@ void SstWriter::EndStep() MarshalAttributes(); auto TSInfo = m_BP5Serializer->CloseTimestep(m_WriterStep); auto lf_FreeBlocks = [](void *vBlock) { - BP3DataBlock *BlockToFree = - reinterpret_cast(vBlock); + BP5DataBlock *BlockToFree = + reinterpret_cast(vBlock); // Free data and metadata blocks here. BlockToFree is the newblock // value in the enclosing function. + free(BlockToFree->MetaMetaBlocks); delete BlockToFree; }; @@ -278,10 +279,13 @@ void SstWriter::EndStep() i++; } MetaMetaBlocks[TSInfo.NewMetaMetaBlocks.size()] = {NULL, 0, NULL, 0}; + newblock->MetaMetaBlocks = MetaMetaBlocks; newblock->metadata.DataSize = TSInfo.MetaEncodeBuffer->m_FixedSize; newblock->metadata.block = TSInfo.MetaEncodeBuffer->Data(); - newblock->data.DataSize = TSInfo.DataBuffer->DataVec()[0].iov_len; - newblock->data.block = (char *)TSInfo.DataBuffer->DataVec()[0].iov_base; + format::BufferV::BufferV_iovec iovec = TSInfo.DataBuffer->DataVec(); + newblock->data.DataSize = iovec[0].iov_len; + newblock->data.block = (char *)iovec[0].iov_base; + delete[] iovec; if (TSInfo.AttributeEncodeBuffer) { newblock->attribute_data.DataSize = diff --git a/source/adios2/engine/sst/SstWriter.h b/source/adios2/engine/sst/SstWriter.h index c5191c978e..700477a488 100644 --- a/source/adios2/engine/sst/SstWriter.h +++ b/source/adios2/engine/sst/SstWriter.h @@ -69,6 +69,7 @@ class SstWriter : public Engine _SstData data; _SstData metadata; _SstData attribute_data; + SstMetaMetaList MetaMetaBlocks; }; std::unique_ptr m_BP3Serializer; diff --git a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp index fe460235ea..3bf4710560 100644 --- a/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp +++ b/source/adios2/toolkit/format/bp5/BP5Deserializer.cpp @@ -740,6 +740,10 @@ void BP5Deserializer::FinalizeGets(std::vector Requests) } } } + for (const auto &Req : Requests) + { + free((char *)Req.DestinationAddr); + } PendingRequests.clear(); } @@ -1007,6 +1011,11 @@ BP5Deserializer::~BP5Deserializer() free(tmp); tmp = next; } + for (auto &VarRec : VarByName) + { + free(VarRec.second->VarName); + delete VarRec.second; + } } #define declare_template_instantiation(T) \ diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp index fab720c2b8..33e142fa19 100644 --- a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp @@ -33,6 +33,12 @@ BP5Serializer::~BP5Serializer() free_FMfield_list(Info.MetaFields); if (Info.LocalFMContext) free_FMcontext(Info.LocalFMContext); + if (Info.DataFields) + free_FMfield_list(Info.DataFields); + if (Info.AttributeFields) + free_FMfield_list(Info.AttributeFields); + if (Info.AttributeData) + free(Info.AttributeData); if (MetadataBuf) { if (((FFSMetadataInfoStruct *)MetadataBuf)->BitField) @@ -638,9 +644,11 @@ BP5Serializer::TimestepInfo BP5Serializer::CloseTimestep(int timestep) if (NewAttribute && Info.AttributeFields) { MetaMetaInfoBlock Block; + char *tmpName = strdup("Attributes"); FMFormat Format = FMregister_simple_format( - Info.LocalFMContext, strdup("Attributes"), Info.AttributeFields, + Info.LocalFMContext, tmpName, Info.AttributeFields, FMstruct_size_field_list(Info.AttributeFields, sizeof(char *))); + free(tmpName); Info.AttributeFormat = Format; int size; Block.MetaMetaInfo = get_server_rep_FMformat(Format, &size); @@ -698,7 +706,12 @@ BP5Serializer::TimestepInfo BP5Serializer::CloseTimestep(int timestep) MBase->BitField = tmp; NewAttribute = false; - return {Formats, Metadata, AttrData, CurDataBuffer}; + struct TimestepInfo Ret + { + Formats, Metadata, AttrData, CurDataBuffer + }; + CurDataBuffer = NULL; + return Ret; #ifdef NDEF SstInternalProvideTimestep(Stream, &MetaDataRec, &DataRec, Timestep, Formats, FreeTSInfo, TSInfo, &AttributeRec, @@ -739,10 +752,8 @@ std::vector BP5Serializer::CopyMetadataToContiguous( RetSize += 2 * sizeof(RetSize); // sizes RetSize += n.MetaMetaInfoLen + n.MetaMetaIDLen; } - uint64_t AfterNMMBlocks = RetSize; RetSize += sizeof(int64_t); // MencodeLen RetSize += MetaEncodeBuffer->m_FixedSize; - uint64_t AfterNMMBlocks2 = RetSize; RetSize += sizeof(DataSize); Ret.resize(RetSize); @@ -760,20 +771,19 @@ std::vector BP5Serializer::CopyMetadataToContiguous( int64_t MEBSize = MetaEncodeBuffer->m_FixedSize; helper::CopyToBuffer(Ret, Position, &MEBSize); - uint64_t MDataPos = Position; helper::CopyToBuffer(Ret, Position, MetaEncodeBuffer->Data(), MetaEncodeBuffer->m_FixedSize); helper::CopyToBuffer(Ret, Position, &DataSize); return Ret; } -std::vector BP5Serializer::BreakoutContiguousMetadata( +std::vector BP5Serializer::BreakoutContiguousMetadata( std::vector *Aggregate, const std::vector Counts, std::vector &UniqueMetaMetaBlocks, std::vector &DataSizes) const { size_t Position = 0; - std::vector MetadataBlocks; + std::vector MetadataBlocks; MetadataBlocks.reserve(Counts.size()); DataSizes.resize(Counts.size()); for (int Rank = 0; Rank < Counts.size(); Rank++) diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.h b/source/adios2/toolkit/format/bp5/BP5Serializer.h index e7ef620a81..4ff3a8ce01 100644 --- a/source/adios2/toolkit/format/bp5/BP5Serializer.h +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.h @@ -8,8 +8,6 @@ #ifndef ADIOS2_TOOLKIT_FORMAT_BP5_BP5SERIALIZER_H_ #define ADIOS2_TOOLKIT_FORMAT_BP5_BP5SERIALIZER_H_ -#include - #include "BP5Base.h" #include "adios2/core/Attribute.h" #include "adios2/core/IO.h" @@ -34,13 +32,21 @@ class BP5Serializer : virtual public BP5Base BP5Serializer(); ~BP5Serializer(); - typedef struct _TimestepInfo + struct TimestepInfo { std::vector NewMetaMetaBlocks; Buffer *MetaEncodeBuffer; Buffer *AttributeEncodeBuffer; BufferV *DataBuffer; - } TimestepInfo; + + ~TimestepInfo() + { + delete MetaEncodeBuffer; + if (AttributeEncodeBuffer) + delete AttributeEncodeBuffer; + delete DataBuffer; + } + }; typedef struct _MetadataInfo { @@ -67,7 +73,7 @@ class BP5Serializer : virtual public BP5Base const std::vector NewmetaMetaBlocks, const format::Buffer *MetaEncodeBuffer, uint64_t DataSize) const; - std::vector BreakoutContiguousMetadata( + std::vector BreakoutContiguousMetadata( std::vector *Aggregate, const std::vector Counts, std::vector &UniqueMetaMetaBlocks, std::vector &DataSizes) const; diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index a32f8813db..277c6fa240 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -870,6 +870,7 @@ void AddFormatsToMetaMetaInfo(SstStream Stream, struct _TimestepMetadataMsg *Msg) { FFSFormatList Formats = Msg->Formats; + STREAM_ASSERT_LOCKED(Stream); while (Formats) { Stream->InternalMetaMetaInfo = @@ -2256,6 +2257,20 @@ extern void SstReaderClose(SstStream Stream) free(Stream->CurrentMetadata); Stream->CurrentMetadata = NULL; } + for (int i = 0; i < Stream->InternalMetaMetaCount; i++) + { + free(Stream->InternalMetaMetaInfo[i].ID); + free(Stream->InternalMetaMetaInfo[i].BlockData); + } + free(Stream->InternalMetaMetaInfo); + if (Stream->InternalAttrDataInfo) + { + for (int i = 0; i < Stream->InternalAttrDataCount; i++) + { + free(Stream->InternalAttrDataInfo[i].BlockData); + } + free(Stream->InternalAttrDataInfo); + } } // SstWaitForCompletion is only called by the main program thread and diff --git a/testing/adios2/engine/staging-common/CMakeLists.txt b/testing/adios2/engine/staging-common/CMakeLists.txt index 886211f249..94aa898f4c 100644 --- a/testing/adios2/engine/staging-common/CMakeLists.txt +++ b/testing/adios2/engine/staging-common/CMakeLists.txt @@ -215,7 +215,7 @@ endif() # Setup tests for BP engines # -if(NOT MSVC) # not on windows +if(NOT WIN32) # not on windows set (BP_TESTS ${ALL_SIMPLE_TESTS}) # Delayed reader not worth testing on file engines list (FILTER BP_TESTS EXCLUDE REGEX "DelayedReader") @@ -234,7 +234,7 @@ if(NOT MSVC) # not on windows endif() # BP5 tests -if(NOT MSVC) # not on windows +if(ADIOS2_HAVE_BP5) set (BP5_TESTS ${ALL_SIMPLE_TESTS}) # Delayed reader not worth testing on file engines list (FILTER BP5_TESTS EXCLUDE REGEX "DelayedReader") diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt index 364655d512..6ffbb41ff8 100644 --- a/thirdparty/CMakeLists.txt +++ b/thirdparty/CMakeLists.txt @@ -103,7 +103,7 @@ cmake_dependent_option(ADIOS2_USE_EXTERNAL_ENET "NOT ADIOS2_USE_EXTERNAL_EVPATH" OFF ) -if(ADIOS2_HAVE_SST) +if(ADIOS2_HAVE_SST OR ADIOS2_HAVE_BP5) if(NOT ADIOS2_USE_EXTERNAL_EVPATH) if(NOT ADIOS2_USE_EXTERNAL_ATL) add_subdirectory(atl) From 07689f767246c9e3c7eb70474ae7b60656b0e038 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 23 May 2021 21:08:37 -0400 Subject: [PATCH 3/8] TSAN --- source/adios2/toolkit/sst/cp/cp_reader.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 277c6fa240..9e2cd57436 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -2257,6 +2257,7 @@ extern void SstReaderClose(SstStream Stream) free(Stream->CurrentMetadata); Stream->CurrentMetadata = NULL; } + STREAM_MUTEX_LOCK(Stream); for (int i = 0; i < Stream->InternalMetaMetaCount; i++) { free(Stream->InternalMetaMetaInfo[i].ID); @@ -2271,6 +2272,7 @@ extern void SstReaderClose(SstStream Stream) } free(Stream->InternalAttrDataInfo); } + STREAM_MUTEX_UNLOCK(Stream); } // SstWaitForCompletion is only called by the main program thread and From b0b61a00c06a88c6770482071e0359b98cd5a557 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 23 May 2021 22:14:08 -0400 Subject: [PATCH 4/8] TSAN --- source/adios2/toolkit/sst/cp/cp_reader.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 9e2cd57436..7b1bb01f18 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -1448,6 +1448,7 @@ extern SstFullMetadata SstGetCurMetadata(SstStream Stream) extern SstMetaMetaList SstGetNewMetaMetaData(SstStream Stream, long Timestep) { int RetCount = 0; + STREAM_MUTEX_LOCK(Stream); for (int i = 0; i < Stream->InternalMetaMetaCount; i++) { if (Stream->InternalMetaMetaInfo[i].TimestepAdded >= Timestep) @@ -1470,6 +1471,7 @@ extern SstMetaMetaList SstGetNewMetaMetaData(SstStream Stream, long Timestep) } } memset(&ret[j], 0, sizeof(ret[j])); + STREAM_MUTEX_UNLOCK(Stream); return ret; } From b83cf26a389ade1c1283c5de9e827218527357b8 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Sun, 23 May 2021 23:06:54 -0400 Subject: [PATCH 5/8] TSAN --- source/adios2/toolkit/sst/cp/cp_reader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 7b1bb01f18..29e33d47f6 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -468,7 +468,6 @@ SstStream SstReaderOpen(const char *Name, SstParams Params, SMPI_Comm comm) Stream = CP_newStream(); Stream->Role = ReaderRole; Stream->mpiComm = comm; - Stream->AttrsRetrieved = 0; SMPI_Comm_rank(Stream->mpiComm, &Stream->Rank); SMPI_Comm_size(Stream->mpiComm, &Stream->CohortSize); @@ -1477,7 +1476,9 @@ extern SstMetaMetaList SstGetNewMetaMetaData(SstStream Stream, long Timestep) extern SstBlock SstGetAttributeData(SstStream Stream, long Timestep) { + STREAM_MUTEX_LOCK(Stream); Stream->AttrsRetrieved = 1; + STREAM_MUTEX_UNLOCK(Stream); return Stream->InternalAttrDataInfo; } From 91f5e9e49a5ae70837b59dea3e330bf845165966 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Mon, 24 May 2021 07:24:43 -0400 Subject: [PATCH 6/8] TSAN --- source/adios2/toolkit/sst/cp/cp_reader.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/adios2/toolkit/sst/cp/cp_reader.c b/source/adios2/toolkit/sst/cp/cp_reader.c index 29e33d47f6..bcafd49981 100644 --- a/source/adios2/toolkit/sst/cp/cp_reader.c +++ b/source/adios2/toolkit/sst/cp/cp_reader.c @@ -1477,9 +1477,10 @@ extern SstMetaMetaList SstGetNewMetaMetaData(SstStream Stream, long Timestep) extern SstBlock SstGetAttributeData(SstStream Stream, long Timestep) { STREAM_MUTEX_LOCK(Stream); + struct _SstBlock *InternalAttrDataInfo = Stream->InternalAttrDataInfo; Stream->AttrsRetrieved = 1; STREAM_MUTEX_UNLOCK(Stream); - return Stream->InternalAttrDataInfo; + return InternalAttrDataInfo; } static void AddToReadStats(SstStream Stream, int Rank, long Timestep, From c01a8511cc3755fefb5fb6ce4e956a73ee4c097c Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Mon, 24 May 2021 16:11:37 -0400 Subject: [PATCH 7/8] Rework mem management in SST/BP5 --- source/adios2/engine/sst/SstWriter.cpp | 24 +++++++++++-------- source/adios2/engine/sst/SstWriter.h | 1 + .../toolkit/format/bp5/BP5Serializer.cpp | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/source/adios2/engine/sst/SstWriter.cpp b/source/adios2/engine/sst/SstWriter.cpp index db2fc82865..9127fb06d8 100644 --- a/source/adios2/engine/sst/SstWriter.cpp +++ b/source/adios2/engine/sst/SstWriter.cpp @@ -256,21 +256,24 @@ void SstWriter::EndStep() else if (Params.MarshalMethod == SstMarshalBP5) { MarshalAttributes(); - auto TSInfo = m_BP5Serializer->CloseTimestep(m_WriterStep); + format::BP5Serializer::TimestepInfo *TSInfo = + new format::BP5Serializer::TimestepInfo( + m_BP5Serializer->CloseTimestep(m_WriterStep)); auto lf_FreeBlocks = [](void *vBlock) { BP5DataBlock *BlockToFree = reinterpret_cast(vBlock); // Free data and metadata blocks here. BlockToFree is the newblock // value in the enclosing function. free(BlockToFree->MetaMetaBlocks); + delete BlockToFree->TSInfo; delete BlockToFree; }; BP5DataBlock *newblock = new BP5DataBlock; SstMetaMetaList MetaMetaBlocks = (SstMetaMetaList)malloc( - (TSInfo.NewMetaMetaBlocks.size() + 1) * sizeof(MetaMetaBlocks[0])); + (TSInfo->NewMetaMetaBlocks.size() + 1) * sizeof(MetaMetaBlocks[0])); int i = 0; - for (const auto &MM : TSInfo.NewMetaMetaBlocks) + for (const auto &MM : TSInfo->NewMetaMetaBlocks) { MetaMetaBlocks[i].BlockData = MM.MetaMetaInfo; MetaMetaBlocks[i].BlockSize = MM.MetaMetaInfoLen; @@ -278,20 +281,21 @@ void SstWriter::EndStep() MetaMetaBlocks[i].IDSize = MM.MetaMetaIDLen; i++; } - MetaMetaBlocks[TSInfo.NewMetaMetaBlocks.size()] = {NULL, 0, NULL, 0}; + MetaMetaBlocks[TSInfo->NewMetaMetaBlocks.size()] = {NULL, 0, NULL, 0}; newblock->MetaMetaBlocks = MetaMetaBlocks; - newblock->metadata.DataSize = TSInfo.MetaEncodeBuffer->m_FixedSize; - newblock->metadata.block = TSInfo.MetaEncodeBuffer->Data(); - format::BufferV::BufferV_iovec iovec = TSInfo.DataBuffer->DataVec(); + newblock->metadata.DataSize = TSInfo->MetaEncodeBuffer->m_FixedSize; + newblock->metadata.block = TSInfo->MetaEncodeBuffer->Data(); + format::BufferV::BufferV_iovec iovec = TSInfo->DataBuffer->DataVec(); newblock->data.DataSize = iovec[0].iov_len; newblock->data.block = (char *)iovec[0].iov_base; + newblock->TSInfo = TSInfo; delete[] iovec; - if (TSInfo.AttributeEncodeBuffer) + if (TSInfo->AttributeEncodeBuffer) { newblock->attribute_data.DataSize = - TSInfo.AttributeEncodeBuffer->m_FixedSize; + TSInfo->AttributeEncodeBuffer->m_FixedSize; newblock->attribute_data.block = - TSInfo.AttributeEncodeBuffer->Data(); + TSInfo->AttributeEncodeBuffer->Data(); } else { diff --git a/source/adios2/engine/sst/SstWriter.h b/source/adios2/engine/sst/SstWriter.h index 700477a488..076df32b05 100644 --- a/source/adios2/engine/sst/SstWriter.h +++ b/source/adios2/engine/sst/SstWriter.h @@ -70,6 +70,7 @@ class SstWriter : public Engine _SstData metadata; _SstData attribute_data; SstMetaMetaList MetaMetaBlocks; + format::BP5Serializer::TimestepInfo *TSInfo; }; std::unique_ptr m_BP3Serializer; diff --git a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp index 33e142fa19..20cc7ca4e2 100644 --- a/source/adios2/toolkit/format/bp5/BP5Serializer.cpp +++ b/source/adios2/toolkit/format/bp5/BP5Serializer.cpp @@ -475,7 +475,7 @@ void BP5Serializer::Marshal(void *Variable, const char *Name, CurDataBuffer = new BufferV("data buffer"); } DataOffset = - CurDataBuffer->AddToVec(ElemCount * ElemSize, Data, ElemSize, Sync); + CurDataBuffer->AddToVec(ElemCount * ElemSize, Data, ElemSize, true); if (!AlreadyWritten) { From 03f365a157df1443587a5bcce4590a3b6630f348 Mon Sep 17 00:00:00 2001 From: Greg Eisenhauer Date: Mon, 24 May 2021 20:08:49 -0400 Subject: [PATCH 8/8] Cleanup --- cmake/DetectOptions.cmake | 14 ++++++++++---- source/adios2/CMakeLists.txt | 1 - source/adios2/engine/bp5/BP5Engine.cpp | 1 - 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cmake/DetectOptions.cmake b/cmake/DetectOptions.cmake index 2df23a8ad3..0be44b4ae6 100644 --- a/cmake/DetectOptions.cmake +++ b/cmake/DetectOptions.cmake @@ -216,9 +216,9 @@ endif() # DataSpaces if(ADIOS2_USE_DataSpaces STREQUAL AUTO) - find_package(DataSpaces 1.8) + find_package(DataSpaces 2.1.1) elseif(ADIOS2_USE_DataSpaces) - find_package(DataSpaces 1.8 REQUIRED) + find_package(DataSpaces 2.1.1 REQUIRED) endif() if(DATASPACES_FOUND) set(ADIOS2_HAVE_DataSpaces TRUE) @@ -293,7 +293,7 @@ if(Python_FOUND) endif() # Even if no python support, we still want the interpreter for tests -if(NOT Python_Interpreter_FOUND) +if(BUILD_TESTING AND NOT Python_Interpreter_FOUND) find_package(Python REQUIRED COMPONENTS Interpreter) endif() @@ -329,6 +329,12 @@ if(ADIOS2_USE_SST AND NOT WIN32) endif() endif() +# DAOS +find_package(DAOS) +if(DAOS_FOUND) + set(ADIOS2_HAVE_DAOS TRUE) +endif() + # BP5 if(ADIOS2_USE_BP5 AND NOT WIN32) set(ADIOS2_HAVE_BP5 TRUE) @@ -372,7 +378,7 @@ include(CheckTypeRepresentation) #check_float_type_representation(double DOUBLE_TYPE_C) #check_float_type_representation("long double" LONG_DOUBLE_TYPE_C) -if(ADIOS2_USE_Fortran) +if(ADIOS2_HAVE_Fortran) #check_float_type_representation(real REAL_TYPE_Fortran LANGUAGE Fortran) #check_float_type_representation("real(kind=4)" REAL4_TYPE_Fortran LANGUAGE Fortran) #check_float_type_representation("real(kind=8)" REAL8_TYPE_Fortran LANGUAGE Fortran) diff --git a/source/adios2/CMakeLists.txt b/source/adios2/CMakeLists.txt index f1a121c789..5949b6decb 100644 --- a/source/adios2/CMakeLists.txt +++ b/source/adios2/CMakeLists.txt @@ -229,7 +229,6 @@ endif() if(ADIOS2_HAVE_SST) add_subdirectory(toolkit/sst) target_sources(adios2_core PRIVATE - engine/sst/SstReader.cpp engine/sst/SstWriter.cpp engine/sst/SstParamParser.cpp diff --git a/source/adios2/engine/bp5/BP5Engine.cpp b/source/adios2/engine/bp5/BP5Engine.cpp index 7d9706a65c..2e55aa10d7 100644 --- a/source/adios2/engine/bp5/BP5Engine.cpp +++ b/source/adios2/engine/bp5/BP5Engine.cpp @@ -182,7 +182,6 @@ void BP5Engine::ParseParams(IO &io, struct BP5Params &Params) // }; #define get_params(Param, Type, Typedecl, Default) \ - std::cout << "GetParam, Param = " << #Param << std::endl; \ lf_Set##Type##Parameter(#Param, Params.Param, Default); BP5_FOREACH_PARAMETER_TYPE_4ARGS(get_params); #undef get_params