Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new frame based podio interface #100

Merged
merged 25 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
e9a7982
add new frame based podio interface
hegner Apr 21, 2023
cf46dac
Update k4FWCore/src/PodioDataSvc.cpp
hegner May 3, 2023
970d8bf
allow different event content for different events
hegner May 3, 2023
f3f6928
fix double bookkeeping of read collections
hegner May 3, 2023
dbd41d0
rename metadata frame to configuration_metadata
hegner May 4, 2023
70a46d2
make magic method in DataWrapper private; add proper friend declaration
hegner May 4, 2023
c49bd17
address more PR comments
hegner May 4, 2023
d0b60cb
add explicit podio requirement to CMake
hegner May 4, 2023
5f797e8
Add a failing test case that shows the issue with Frame I/O
tmadlener May 7, 2023
4fcfdb1
move reading/creation of frames to i_setRoot
hegner May 10, 2023
4964a3b
adjust ctest for return code of Gaudi in ScheduledStop
hegner May 12, 2023
db4aac7
enable transient non-PODIO types
hegner May 22, 2023
12a916a
Fix final missing constexpr to make transient store work
tmadlener May 22, 2023
4a8f7ae
add prototype for MetaDataHandle; no R/W mode implementation yet
hegner May 24, 2023
fdbddab
include feedback to MetaDataHandle
hegner May 24, 2023
62d9b69
remove algorithm dependency from MetaDataHandle
hegner May 25, 2023
8f4a3da
make descriptor generation function publicly available
hegner May 25, 2023
166ac85
introduce namespace
hegner May 25, 2023
35e9c8c
Make legacy output string_view safe
tmadlener Jun 2, 2023
cedf8d7
Merge branch 'master' into frame
jmcarcell Jun 21, 2023
544ddbe
Update test/k4FWCoreTest/CMakeLists.txt
jmcarcell Jun 22, 2023
478dbb5
Update k4FWCore/src/PodioDataSvc.cpp
jmcarcell Jun 22, 2023
629e628
Remove FWFloatProducer from the README
jmcarcell Jun 22, 2023
e871631
Remove unneeded TFile include
jmcarcell Jun 22, 2023
e3fb057
silence MetaDataHandle warnings during genconf step
hegner Jun 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
12 changes: 3 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# k4FWCore (Key4HEP FrameWork Core)


k4FWCore is a Gaudi package that provides the PodioDataService, that allows to use podio-based event data models like edm4hep in gaudi workflows.
# k4FWCore (key4hep FrameWork Core)

k4FWCore is a Gaudi package that provides the PodioDataService, that allows to
use podio-based event data models like EDM4hep in Gaudi workflows.

## Components

Expand All @@ -12,7 +11,6 @@ k4FWCore is a Gaudi package that provides the PodioDataService, that allows to

Component wrapping the PodioDataService to handle PODIO types and collections.


#### PodioInput

Algorithm to read data from input file(s) on disk.
Expand All @@ -21,10 +19,6 @@ Algorithm to read data from input file(s) on disk.

Algorithm to write data to output file on disk.

#### FWFloatProducer

Algorithm with a simple float as output, for testing and adding parameters to jobs.


## Dependencies

Expand Down
8 changes: 2 additions & 6 deletions k4FWCore/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# Package: k4FWCore
################################################################################

find_package(podio)
find_package(podio 0.16.3 REQUIRED)

gaudi_install(SCRIPTS)


gaudi_add_library(k4FWCore
SOURCES src/PodioDataSvc.cpp
src/PodioLegacyDataSvc.cpp
src/KeepDropSwitch.cpp
LINK Gaudi::GaudiAlgLib Gaudi::GaudiKernel podio::podioRootIO ROOT::Core ROOT::RIO ROOT::Tree
)
Expand All @@ -30,8 +31,3 @@ install(TARGETS k4FWCore k4FWCorePlugins
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT shlib
COMPONENT dev)


add_test(NAME WriteTest
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_BINARY_DIR}/run ${PROJECT_SOURCE_DIR}/k4FWCore/scripts/k4run k4FWCore/test/options/simple_write.py)

20 changes: 7 additions & 13 deletions k4FWCore/components/PodioInput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,22 @@ StatusCode PodioInput::initialize() {
if (nullptr == m_podioDataSvc)
return StatusCode::FAILURE;

auto idTable = m_podioDataSvc->getCollectionIDs();
for (auto& name : m_collectionNames) {
debug() << "Finding collection " << name << " in collection registry." << endmsg;
if (!idTable->present(name)) {
error() << "Requested product " << name << " not found." << endmsg;
return StatusCode::FAILURE;
}
m_collectionIDs.push_back(idTable->collectionID(name));
}
// TODO: add an upfront check for existence of data products

return StatusCode::SUCCESS;
}

StatusCode PodioInput::execute() {
size_t cntr = 0;
// Re-create the collections from ROOT file
for (auto& id : m_collectionIDs) {
const std::string& collName = m_collectionNames.value().at(cntr++);
debug() << "Registering collection to read " << collName << " with id " << id << endmsg;
if (m_podioDataSvc->readCollection(collName, id).isFailure()) {

for (auto& collName : m_collectionNames) {
debug() << "Registering collection to read " << collName << endmsg;
if (m_podioDataSvc->readCollection(collName).isFailure()) {
return StatusCode::FAILURE;
}
}

// Tell data service that we are done with requested collections
m_podioDataSvc->endOfRead();
return StatusCode::SUCCESS;
Expand Down
2 changes: 0 additions & 2 deletions k4FWCore/components/PodioInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ class PodioInput : public GaudiAlgorithm {
private:
/// Name of collections to read. Set by option collections (this is temporary)
Gaudi::Property<std::vector<std::string>> m_collectionNames{this, "collections", {}, "Places of collections to read"};
/// Collection IDs (retrieved with CollectionIDTable from ROOT file, using collection names)
std::vector<int> m_collectionIDs;
/// Data service: needed to register objects and get collection IDs. Just an observing pointer.
PodioDataSvc* m_podioDataSvc;
};
Expand Down
53 changes: 53 additions & 0 deletions k4FWCore/components/PodioLegacyInput.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "PodioLegacyInput.h"

#include "TFile.h"
#include "TROOT.h"

#include "k4FWCore/DataWrapper.h"
#include "k4FWCore/PodioLegacyDataSvc.h"

DECLARE_COMPONENT(PodioLegacyInput)

PodioLegacyInput::PodioLegacyInput(const std::string& name, ISvcLocator* svcLoc) : GaudiAlgorithm(name, svcLoc) {}

StatusCode PodioLegacyInput::initialize() {
if (GaudiAlgorithm::initialize().isFailure())
return StatusCode::FAILURE;

// check whether we have the PodioEvtSvc active
m_podioLegacyDataSvc = dynamic_cast<PodioLegacyDataSvc*>(evtSvc().get());
if (nullptr == m_podioLegacyDataSvc)
return StatusCode::FAILURE;

auto idTable = m_podioLegacyDataSvc->getCollectionIDs();
for (auto& name : m_collectionNames) {
debug() << "Finding collection " << name << " in collection registry." << endmsg;
if (!idTable->present(name)) {
error() << "Requested product " << name << " not found." << endmsg;
return StatusCode::FAILURE;
}
m_collectionIDs.push_back(idTable->collectionID(name));
}
return StatusCode::SUCCESS;
}

StatusCode PodioLegacyInput::execute() {
size_t cntr = 0;
// Re-create the collections from ROOT file
for (auto& id : m_collectionIDs) {
const std::string& collName = m_collectionNames.value().at(cntr++);
debug() << "Registering collection to read " << collName << " with id " << id << endmsg;
if (m_podioLegacyDataSvc->readCollection(collName, id).isFailure()) {
return StatusCode::FAILURE;
}
}
// Tell data service that we are done with requested collections
m_podioLegacyDataSvc->endOfRead();
return StatusCode::SUCCESS;
}

StatusCode PodioLegacyInput::finalize() {
if (GaudiAlgorithm::finalize().isFailure())
return StatusCode::FAILURE;
return StatusCode::SUCCESS;
}
41 changes: 41 additions & 0 deletions k4FWCore/components/PodioLegacyInput.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef FWCORE_PODIOLEGACYINPUT_H
#define FWCORE_PODIOLEGACYINPUT_H
// Gaaudi
#include "GaudiAlg/GaudiAlgorithm.h"

// STL
#include <string>
#include <vector>

// forward declarations
// from k4FWCore:
class PodioLegacyDataSvc;

/** @class PodioLegacyInput
*
* Class that allows to read ROOT files written with PodioOutput
*
* @author J. Lingemann
*/

class PodioLegacyInput : public GaudiAlgorithm {
public:
/// Constructor.
PodioLegacyInput(const std::string& name, ISvcLocator* svcLoc);
/// Initialization of PodioInput. Acquires the data service, opens root file and creates trees.
virtual StatusCode initialize();
/// Execute. Re-creates collections that are specified to be read and sets references.
virtual StatusCode execute();
/// Finalize. Closes ROOT file.
virtual StatusCode finalize();

private:
/// Name of collections to read. Set by option collections (this is temporary)
Gaudi::Property<std::vector<std::string>> m_collectionNames{this, "collections", {}, "Places of collections to read"};
/// Collection IDs (retrieved with CollectionIDTable from ROOT file, using collection names)
std::vector<int> m_collectionIDs;
/// Data service: needed to register objects and get collection IDs. Just an observing pointer.
PodioLegacyDataSvc* m_podioLegacyDataSvc;
};

#endif
200 changes: 200 additions & 0 deletions k4FWCore/components/PodioLegacyOutput.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#include <cstdlib>

#include "PodioLegacyOutput.h"
#include "TFile.h"
#include "k4FWCore/PodioLegacyDataSvc.h"
#include "rootUtils.h"

DECLARE_COMPONENT(PodioLegacyOutput)

PodioLegacyOutput::PodioLegacyOutput(const std::string& name, ISvcLocator* svcLoc)
: GaudiAlgorithm(name, svcLoc), m_firstEvent(true) {}

StatusCode PodioLegacyOutput::initialize() {
if (GaudiAlgorithm::initialize().isFailure())
return StatusCode::FAILURE;

// check whether we have the PodioEvtSvc active
m_podioLegacyDataSvc = dynamic_cast<PodioLegacyDataSvc*>(evtSvc().get());
if (nullptr == m_podioLegacyDataSvc) {
error() << "Could not get DataSvc!" << endmsg;
return StatusCode::FAILURE;
}

m_file = std::unique_ptr<TFile>(TFile::Open(m_filename.value().c_str(), "RECREATE", "data file"));
// Both trees are written to the ROOT file and owned by it
// PodioDataSvc has ownership of EventDataTree
m_datatree = m_podioLegacyDataSvc->eventDataTree();
m_datatree->SetDirectory(m_file.get());
m_metadatatree = new TTree("metadata", "Metadata tree");
m_runMDtree = new TTree("run_metadata", "Run metadata tree");
m_evtMDtree = new TTree("evt_metadata", "Event metadata tree");
m_colMDtree = new TTree("col_metadata", "Collection metadata tree");

m_evtMDtree->Branch("evtMD", "GenericParameters", m_podioLegacyDataSvc->getProvider().eventMetaDataPtr());
m_switch = KeepDropSwitch(m_outputCommands);
return StatusCode::SUCCESS;
}

void PodioLegacyOutput::resetBranches(const std::vector<std::pair<std::string, podio::CollectionBase*>>& collections) {
for (auto& collNamePair : collections) {
auto collName = collNamePair.first;
if (m_switch.isOn(collName)) {
// Reconnect branches and collections
const auto collBuffers = collNamePair.second->getBuffers();
m_datatree->SetBranchAddress(collName.c_str(), collBuffers.data);
auto colls = collBuffers.references;
if (colls != nullptr) {
for (size_t j = 0; j < colls->size(); ++j) {
auto l_branch = m_datatree->GetBranch((collName + "#" + std::to_string(j)).c_str());
l_branch->SetAddress(&(*colls)[j]);
}
}
auto colls_v = collBuffers.vectorMembers;
if (colls_v != nullptr) {
int j = 0;
for (auto& c : (*colls_v)) {
void* add = c.second;
m_datatree->SetBranchAddress((collName + "_" + std::to_string(j)).c_str(), add);
++j;
}
}
}
collNamePair.second->prepareForWrite();
}
}

void PodioLegacyOutput::createBranches(const std::vector<std::pair<std::string, podio::CollectionBase*>>& collections) {
// collectionID, collection type, subset collection
std::vector<std::tuple<int, std::string, bool>>* collectionInfo =
new std::vector<std::tuple<int, std::string, bool>>();
collectionInfo->reserve(collections.size());

for (auto& collNamePair : collections) {
auto collName = collNamePair.first;
std::string className(collNamePair.second->getValueTypeName());
std::string collClassName = "vector<" + className + "Data>";
int isOn = 0;
if (m_switch.isOn(collName)) {
isOn = 1;
const auto collBuffers = collNamePair.second->getBuffers();
m_datatree->Branch(collName.c_str(), collClassName.c_str(), collBuffers.data);
// Create branches for collections holding relations
if (auto refColls = collBuffers.references) {
int i = 0;
for (auto& c : (*refColls)) {
const auto brName = podio::root_utils::refBranch(collName, i);
m_datatree->Branch(brName.c_str(), c.get());
++i;
}
// ---- vector members
auto vminfo = collBuffers.vectorMembers;
if (vminfo != nullptr) {
int i = 0;
for (auto& c : (*vminfo)) {
std::string typeName = "vector<" + c.first + ">";
void* add = c.second;
m_datatree->Branch((collName + "_" + std::to_string(i)).c_str(), typeName.c_str(), add);
++i;
}
}
}
}

const auto collID = m_podioLegacyDataSvc->getCollectionIDs()->collectionID(collName);
// No check necessary, only registered collections possible
auto coll = collNamePair.second;
const auto collType = std::string(coll->getValueTypeName()) + "Collection";
collectionInfo->emplace_back(collID, std::move(collType), coll->isSubsetCollection());
//}

debug() << isOn << " Registering collection " << collClassName << " " << collName.c_str() << " containing type "
<< className << endmsg;
collNamePair.second->prepareForWrite();
}

m_metadatatree->Branch("CollectionTypeInfo", collectionInfo);
}

StatusCode PodioLegacyOutput::execute() {
// for now assume identical content for every event
// register for writing
if (m_firstEvent) {
createBranches(m_podioLegacyDataSvc->getCollections());
} else {
resetBranches(m_podioLegacyDataSvc->getCollections());
}
m_firstEvent = false;
debug() << "Filling DataTree .." << endmsg;
m_datatree->Fill();
m_evtMDtree->Fill();
return StatusCode::SUCCESS;
}

/** PodioLegacyOutput::finalize
* has to happen after all algorithms that touch the data store finish.
* Here the job options are retrieved and stored to disk as a branch
* in the metadata tree.
*
*/
StatusCode PodioLegacyOutput::finalize() {
if (GaudiAlgorithm::finalize().isFailure())
return StatusCode::FAILURE;
//// prepare job options metadata ///////////////////////
// retrieve the configuration of the job
// and write it to file as vector of strings
std::vector<std::string> config_data;
const auto& jobOptionsSvc = Gaudi::svcLocator()->getOptsSvc();
const auto& configured_properties = jobOptionsSvc.items();
for (const auto& per_property : configured_properties) {
std::stringstream config_stream;
// sample output:
// HepMCToEDMConverter.genparticles = "GenParticles";
// Note that quotes are added to all property values,
// which leads to problems with ints, lists, dicts and bools.
// For theses types, the quotes must be removed in postprocessing.
config_stream << std::get<0>(per_property) << " = \"" << std::get<1>(per_property) << "\";" << std::endl;
config_data.push_back(config_stream.str());
}
// Some default components are not captured by the job option service
// and have to be traversed like this. Note that Gaudi!577 will improve this.
for (const auto* name : {"ApplicationMgr", "MessageSvc", "NTupleSvc"}) {
std::stringstream config_stream;
auto svc = service<IProperty>(name);
if (!svc.isValid())
continue;
for (const auto* property : svc->getProperties()) {
config_stream << name << "." << property->name() << " = \"" << property->toString() << "\";" << std::endl;
}
config_data.push_back(config_stream.str());
}
//// finalize trees and file //////////////////////////////
m_file->cd();

if (const char* env_key4hep_stack = std::getenv("KEY4HEP_STACK")) {
std::string s_env_key4hep_stack = env_key4hep_stack;
m_metadatatree->Branch("key4hepStack", &s_env_key4hep_stack);
}

m_metadatatree->Branch("gaudiConfigOptions", &config_data);
m_metadatatree->Branch("CollectionIDs", m_podioLegacyDataSvc->getCollectionIDs());

m_metadatatree->Fill();

m_colMDtree->Branch("colMD", "std::map<int,podio::GenericParameters>",
m_podioLegacyDataSvc->getProvider().getColMetaDataMap());
m_colMDtree->Fill();
m_runMDtree->Branch("runMD", "std::map<int,podio::GenericParameters>",
m_podioLegacyDataSvc->getProvider().getRunMetaDataMap());
m_runMDtree->Fill();

m_datatree->Write();
m_file->Write();
m_file->Close();
info() << "Data written to: " << m_filename.value();
if (!m_filenameRemote.value().empty()) {
TFile::Cp(m_filename.value().c_str(), m_filenameRemote.value().c_str(), false);
info() << " and copied to: " << m_filenameRemote.value() << endmsg;
}
return StatusCode::SUCCESS;
}