Skip to content

Commit

Permalink
add new frame based podio interface (#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
hegner committed Jun 26, 2023
1 parent 0741d91 commit 3d69b33
Show file tree
Hide file tree
Showing 37 changed files with 1,085 additions and 390 deletions.
12 changes: 3 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# k4FWCore (Key4HEP FrameWork Core)


k4FWCore is a Gaudi package that provides the PodioDataService, that allows to use podio-based event data models like edm4hep in gaudi workflows.
# k4FWCore (key4hep FrameWork Core)

k4FWCore is a Gaudi package that provides the PodioDataService, that allows to
use podio-based event data models like EDM4hep in Gaudi workflows.

## Components

Expand All @@ -12,7 +11,6 @@ k4FWCore is a Gaudi package that provides the PodioDataService, that allows to

Component wrapping the PodioDataService to handle PODIO types and collections.


#### PodioInput

Algorithm to read data from input file(s) on disk.
Expand All @@ -21,10 +19,6 @@ Algorithm to read data from input file(s) on disk.

Algorithm to write data to output file on disk.

#### FWFloatProducer

Algorithm with a simple float as output, for testing and adding parameters to jobs.


## Dependencies

Expand Down
8 changes: 2 additions & 6 deletions k4FWCore/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
# Package: k4FWCore
################################################################################

find_package(podio)
find_package(podio 0.16.3 REQUIRED)

gaudi_install(SCRIPTS)


gaudi_add_library(k4FWCore
SOURCES src/PodioDataSvc.cpp
src/PodioLegacyDataSvc.cpp
src/KeepDropSwitch.cpp
LINK Gaudi::GaudiAlgLib Gaudi::GaudiKernel podio::podioRootIO ROOT::Core ROOT::RIO ROOT::Tree
)
Expand All @@ -30,8 +31,3 @@ install(TARGETS k4FWCore k4FWCorePlugins
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" COMPONENT shlib
COMPONENT dev)


add_test(NAME WriteTest
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND ${CMAKE_BINARY_DIR}/run ${PROJECT_SOURCE_DIR}/k4FWCore/scripts/k4run k4FWCore/test/options/simple_write.py)

20 changes: 7 additions & 13 deletions k4FWCore/components/PodioInput.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,22 @@ StatusCode PodioInput::initialize() {
if (nullptr == m_podioDataSvc)
return StatusCode::FAILURE;

auto idTable = m_podioDataSvc->getCollectionIDs();
for (auto& name : m_collectionNames) {
debug() << "Finding collection " << name << " in collection registry." << endmsg;
if (!idTable->present(name)) {
error() << "Requested product " << name << " not found." << endmsg;
return StatusCode::FAILURE;
}
m_collectionIDs.push_back(idTable->collectionID(name));
}
// TODO: add an upfront check for existence of data products

return StatusCode::SUCCESS;
}

StatusCode PodioInput::execute() {
size_t cntr = 0;
// Re-create the collections from ROOT file
for (auto& id : m_collectionIDs) {
const std::string& collName = m_collectionNames.value().at(cntr++);
debug() << "Registering collection to read " << collName << " with id " << id << endmsg;
if (m_podioDataSvc->readCollection(collName, id).isFailure()) {

for (auto& collName : m_collectionNames) {
debug() << "Registering collection to read " << collName << endmsg;
if (m_podioDataSvc->readCollection(collName).isFailure()) {
return StatusCode::FAILURE;
}
}

// Tell data service that we are done with requested collections
m_podioDataSvc->endOfRead();
return StatusCode::SUCCESS;
Expand Down
2 changes: 0 additions & 2 deletions k4FWCore/components/PodioInput.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ class PodioInput : public GaudiAlgorithm {
private:
/// Name of collections to read. Set by option collections (this is temporary)
Gaudi::Property<std::vector<std::string>> m_collectionNames{this, "collections", {}, "Places of collections to read"};
/// Collection IDs (retrieved with CollectionIDTable from ROOT file, using collection names)
std::vector<int> m_collectionIDs;
/// Data service: needed to register objects and get collection IDs. Just an observing pointer.
PodioDataSvc* m_podioDataSvc;
};
Expand Down
53 changes: 53 additions & 0 deletions k4FWCore/components/PodioLegacyInput.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#include "PodioLegacyInput.h"

#include "TFile.h"
#include "TROOT.h"

#include "k4FWCore/DataWrapper.h"
#include "k4FWCore/PodioLegacyDataSvc.h"

DECLARE_COMPONENT(PodioLegacyInput)

PodioLegacyInput::PodioLegacyInput(const std::string& name, ISvcLocator* svcLoc) : GaudiAlgorithm(name, svcLoc) {}

StatusCode PodioLegacyInput::initialize() {
if (GaudiAlgorithm::initialize().isFailure())
return StatusCode::FAILURE;

// check whether we have the PodioEvtSvc active
m_podioLegacyDataSvc = dynamic_cast<PodioLegacyDataSvc*>(evtSvc().get());
if (nullptr == m_podioLegacyDataSvc)
return StatusCode::FAILURE;

auto idTable = m_podioLegacyDataSvc->getCollectionIDs();
for (auto& name : m_collectionNames) {
debug() << "Finding collection " << name << " in collection registry." << endmsg;
if (!idTable->present(name)) {
error() << "Requested product " << name << " not found." << endmsg;
return StatusCode::FAILURE;
}
m_collectionIDs.push_back(idTable->collectionID(name));
}
return StatusCode::SUCCESS;
}

StatusCode PodioLegacyInput::execute() {
size_t cntr = 0;
// Re-create the collections from ROOT file
for (auto& id : m_collectionIDs) {
const std::string& collName = m_collectionNames.value().at(cntr++);
debug() << "Registering collection to read " << collName << " with id " << id << endmsg;
if (m_podioLegacyDataSvc->readCollection(collName, id).isFailure()) {
return StatusCode::FAILURE;
}
}
// Tell data service that we are done with requested collections
m_podioLegacyDataSvc->endOfRead();
return StatusCode::SUCCESS;
}

StatusCode PodioLegacyInput::finalize() {
if (GaudiAlgorithm::finalize().isFailure())
return StatusCode::FAILURE;
return StatusCode::SUCCESS;
}
41 changes: 41 additions & 0 deletions k4FWCore/components/PodioLegacyInput.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#ifndef FWCORE_PODIOLEGACYINPUT_H
#define FWCORE_PODIOLEGACYINPUT_H
// Gaaudi
#include "GaudiAlg/GaudiAlgorithm.h"

// STL
#include <string>
#include <vector>

// forward declarations
// from k4FWCore:
class PodioLegacyDataSvc;

/** @class PodioLegacyInput
*
* Class that allows to read ROOT files written with PodioOutput
*
* @author J. Lingemann
*/

class PodioLegacyInput : public GaudiAlgorithm {
public:
/// Constructor.
PodioLegacyInput(const std::string& name, ISvcLocator* svcLoc);
/// Initialization of PodioInput. Acquires the data service, opens root file and creates trees.
virtual StatusCode initialize();
/// Execute. Re-creates collections that are specified to be read and sets references.
virtual StatusCode execute();
/// Finalize. Closes ROOT file.
virtual StatusCode finalize();

private:
/// Name of collections to read. Set by option collections (this is temporary)
Gaudi::Property<std::vector<std::string>> m_collectionNames{this, "collections", {}, "Places of collections to read"};
/// Collection IDs (retrieved with CollectionIDTable from ROOT file, using collection names)
std::vector<int> m_collectionIDs;
/// Data service: needed to register objects and get collection IDs. Just an observing pointer.
PodioLegacyDataSvc* m_podioLegacyDataSvc;
};

#endif
200 changes: 200 additions & 0 deletions k4FWCore/components/PodioLegacyOutput.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
#include <cstdlib>

#include "PodioLegacyOutput.h"
#include "TFile.h"
#include "k4FWCore/PodioLegacyDataSvc.h"
#include "rootUtils.h"

DECLARE_COMPONENT(PodioLegacyOutput)

PodioLegacyOutput::PodioLegacyOutput(const std::string& name, ISvcLocator* svcLoc)
: GaudiAlgorithm(name, svcLoc), m_firstEvent(true) {}

StatusCode PodioLegacyOutput::initialize() {
if (GaudiAlgorithm::initialize().isFailure())
return StatusCode::FAILURE;

// check whether we have the PodioEvtSvc active
m_podioLegacyDataSvc = dynamic_cast<PodioLegacyDataSvc*>(evtSvc().get());
if (nullptr == m_podioLegacyDataSvc) {
error() << "Could not get DataSvc!" << endmsg;
return StatusCode::FAILURE;
}

m_file = std::unique_ptr<TFile>(TFile::Open(m_filename.value().c_str(), "RECREATE", "data file"));
// Both trees are written to the ROOT file and owned by it
// PodioDataSvc has ownership of EventDataTree
m_datatree = m_podioLegacyDataSvc->eventDataTree();
m_datatree->SetDirectory(m_file.get());
m_metadatatree = new TTree("metadata", "Metadata tree");
m_runMDtree = new TTree("run_metadata", "Run metadata tree");
m_evtMDtree = new TTree("evt_metadata", "Event metadata tree");
m_colMDtree = new TTree("col_metadata", "Collection metadata tree");

m_evtMDtree->Branch("evtMD", "GenericParameters", m_podioLegacyDataSvc->getProvider().eventMetaDataPtr());
m_switch = KeepDropSwitch(m_outputCommands);
return StatusCode::SUCCESS;
}

void PodioLegacyOutput::resetBranches(const std::vector<std::pair<std::string, podio::CollectionBase*>>& collections) {
for (auto& collNamePair : collections) {
auto collName = collNamePair.first;
if (m_switch.isOn(collName)) {
// Reconnect branches and collections
const auto collBuffers = collNamePair.second->getBuffers();
m_datatree->SetBranchAddress(collName.c_str(), collBuffers.data);
auto colls = collBuffers.references;
if (colls != nullptr) {
for (size_t j = 0; j < colls->size(); ++j) {
auto l_branch = m_datatree->GetBranch((collName + "#" + std::to_string(j)).c_str());
l_branch->SetAddress(&(*colls)[j]);
}
}
auto colls_v = collBuffers.vectorMembers;
if (colls_v != nullptr) {
int j = 0;
for (auto& c : (*colls_v)) {
void* add = c.second;
m_datatree->SetBranchAddress((collName + "_" + std::to_string(j)).c_str(), add);
++j;
}
}
}
collNamePair.second->prepareForWrite();
}
}

void PodioLegacyOutput::createBranches(const std::vector<std::pair<std::string, podio::CollectionBase*>>& collections) {
// collectionID, collection type, subset collection
std::vector<std::tuple<int, std::string, bool>>* collectionInfo =
new std::vector<std::tuple<int, std::string, bool>>();
collectionInfo->reserve(collections.size());

for (auto& collNamePair : collections) {
auto collName = collNamePair.first;
std::string className(collNamePair.second->getValueTypeName());
std::string collClassName = "vector<" + className + "Data>";
int isOn = 0;
if (m_switch.isOn(collName)) {
isOn = 1;
const auto collBuffers = collNamePair.second->getBuffers();
m_datatree->Branch(collName.c_str(), collClassName.c_str(), collBuffers.data);
// Create branches for collections holding relations
if (auto refColls = collBuffers.references) {
int i = 0;
for (auto& c : (*refColls)) {
const auto brName = podio::root_utils::refBranch(collName, i);
m_datatree->Branch(brName.c_str(), c.get());
++i;
}
// ---- vector members
auto vminfo = collBuffers.vectorMembers;
if (vminfo != nullptr) {
int i = 0;
for (auto& c : (*vminfo)) {
std::string typeName = "vector<" + c.first + ">";
void* add = c.second;
m_datatree->Branch((collName + "_" + std::to_string(i)).c_str(), typeName.c_str(), add);
++i;
}
}
}
}

const auto collID = m_podioLegacyDataSvc->getCollectionIDs()->collectionID(collName);
// No check necessary, only registered collections possible
auto coll = collNamePair.second;
const auto collType = std::string(coll->getValueTypeName()) + "Collection";
collectionInfo->emplace_back(collID, std::move(collType), coll->isSubsetCollection());
//}

debug() << isOn << " Registering collection " << collClassName << " " << collName.c_str() << " containing type "
<< className << endmsg;
collNamePair.second->prepareForWrite();
}

m_metadatatree->Branch("CollectionTypeInfo", collectionInfo);
}

StatusCode PodioLegacyOutput::execute() {
// for now assume identical content for every event
// register for writing
if (m_firstEvent) {
createBranches(m_podioLegacyDataSvc->getCollections());
} else {
resetBranches(m_podioLegacyDataSvc->getCollections());
}
m_firstEvent = false;
debug() << "Filling DataTree .." << endmsg;
m_datatree->Fill();
m_evtMDtree->Fill();
return StatusCode::SUCCESS;
}

/** PodioLegacyOutput::finalize
* has to happen after all algorithms that touch the data store finish.
* Here the job options are retrieved and stored to disk as a branch
* in the metadata tree.
*
*/
StatusCode PodioLegacyOutput::finalize() {
if (GaudiAlgorithm::finalize().isFailure())
return StatusCode::FAILURE;
//// prepare job options metadata ///////////////////////
// retrieve the configuration of the job
// and write it to file as vector of strings
std::vector<std::string> config_data;
const auto& jobOptionsSvc = Gaudi::svcLocator()->getOptsSvc();
const auto& configured_properties = jobOptionsSvc.items();
for (const auto& per_property : configured_properties) {
std::stringstream config_stream;
// sample output:
// HepMCToEDMConverter.genparticles = "GenParticles";
// Note that quotes are added to all property values,
// which leads to problems with ints, lists, dicts and bools.
// For theses types, the quotes must be removed in postprocessing.
config_stream << std::get<0>(per_property) << " = \"" << std::get<1>(per_property) << "\";" << std::endl;
config_data.push_back(config_stream.str());
}
// Some default components are not captured by the job option service
// and have to be traversed like this. Note that Gaudi!577 will improve this.
for (const auto* name : {"ApplicationMgr", "MessageSvc", "NTupleSvc"}) {
std::stringstream config_stream;
auto svc = service<IProperty>(name);
if (!svc.isValid())
continue;
for (const auto* property : svc->getProperties()) {
config_stream << name << "." << property->name() << " = \"" << property->toString() << "\";" << std::endl;
}
config_data.push_back(config_stream.str());
}
//// finalize trees and file //////////////////////////////
m_file->cd();

if (const char* env_key4hep_stack = std::getenv("KEY4HEP_STACK")) {
std::string s_env_key4hep_stack = env_key4hep_stack;
m_metadatatree->Branch("key4hepStack", &s_env_key4hep_stack);
}

m_metadatatree->Branch("gaudiConfigOptions", &config_data);
m_metadatatree->Branch("CollectionIDs", m_podioLegacyDataSvc->getCollectionIDs());

m_metadatatree->Fill();

m_colMDtree->Branch("colMD", "std::map<int,podio::GenericParameters>",
m_podioLegacyDataSvc->getProvider().getColMetaDataMap());
m_colMDtree->Fill();
m_runMDtree->Branch("runMD", "std::map<int,podio::GenericParameters>",
m_podioLegacyDataSvc->getProvider().getRunMetaDataMap());
m_runMDtree->Fill();

m_datatree->Write();
m_file->Write();
m_file->Close();
info() << "Data written to: " << m_filename.value();
if (!m_filenameRemote.value().empty()) {
TFile::Cp(m_filename.value().c_str(), m_filenameRemote.value().c_str(), false);
info() << " and copied to: " << m_filenameRemote.value() << endmsg;
}
return StatusCode::SUCCESS;
}

0 comments on commit 3d69b33

Please sign in to comment.