Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 32 additions & 15 deletions bindings/pyroot/pythonizations/test/rdataframe_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,14 @@
import platform
import unittest

import numpy
import ROOT


class DatasetContext:
"""A helper class to create the dataset for the tutorial below."""

filenames = [
"rdataframe_misc_1.root",
"rdataframe_misc_2.root",
"rdataframe_misc_3.root"
]
filenames = ["rdataframe_misc_1.root", "rdataframe_misc_2.root", "rdataframe_misc_3.root"]
treename = "dataset"
nentries = 5

Expand Down Expand Up @@ -46,6 +43,7 @@ def __exit__(self, *_):
for filename in self.filenames:
os.remove(filename)


class RDataFrameMisc(unittest.TestCase):
"""Miscellaneous RDataFrame tests"""

Expand All @@ -57,10 +55,7 @@ def test_empty_filenames(self):
# https://bugs.llvm.org/show_bug.cgi?id=49692 :
# llvm JIT fails to catch exceptions on MacOS ARM, so we disable their testing
# Also fails on Windows for the same reason
if (
(platform.processor() != "arm" or platform.mac_ver()[0] == '') and not
platform.system() == "Windows"
):
if (platform.processor() != "arm" or platform.mac_ver()[0] == "") and not platform.system() == "Windows":
# With implicit conversions, cppyy also needs to try dispatching to the various
# constructor overloads. The C++ exception will be thrown, but will be incapsulated
# in a more generic TypeError telling the user that none of the overloads worked
Expand All @@ -81,7 +76,7 @@ def _get_rdf(self, dataset):
chain.Add(filename)

return ROOT.RDataFrame(chain)

def _get_chain(self, dataset):
chain = ROOT.TChain(dataset.treename)
for filename in dataset.filenames:
Expand All @@ -93,9 +88,8 @@ def _define_col(self, rdf):

def _filter_x(self, rdf):
return rdf.Filter("x > 2")

def _test_rdf_in_function(self, chain):

def _test_rdf_in_function(self, chain):
rdf = ROOT.RDataFrame(chain)
meanx = rdf.Mean("x")
meany = rdf.Mean("y")
Expand All @@ -115,8 +109,8 @@ def test_ttree_ownership(self):
rdf = self._get_rdf(dataset)

npy_dict = rdf.AsNumpy()
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["x"], numpy.array([1,2,3,4,5]*3)))
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["y"], numpy.array([2,4,6,8,10]*3)))
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["x"], numpy.array([1, 2, 3, 4, 5] * 3)))
self.assertIsNone(numpy.testing.assert_array_equal(npy_dict["y"], numpy.array([2, 4, 6, 8, 10] * 3)))

chain = self._get_chain(dataset)

Expand All @@ -129,8 +123,31 @@ def test_ttree_ownership(self):

self.assertEqual(rdf.Count().GetValue(), 9)

def test_regression_gh_20291(self):
"""
Regression test for https://github.com/root-project/root/issues/20291
"""
# Issues on Windows with contention on file deletion
if platform.system() == "Windows":
return
out_path = "dataframe_misc_regression_gh20291.root"
try:
x, y = numpy.array([1, 2, 3]), numpy.array([4, 5, 6])
df = ROOT.RDF.FromNumpy({"x": x, "y": y})

df.Snapshot("tree", out_path)

df_out = ROOT.RDataFrame("tree", out_path)
count = df_out.Count()
take_x = df_out.Take["Long64_t"]("x")
take_y = df_out.Take["Long64_t"]("y")

self.assertEqual(count.GetValue(), 3)
self.assertSequenceEqual(take_x.GetValue(), [1, 2, 3])
self.assertSequenceEqual(take_y.GetValue(), [4, 5, 6])
finally:
os.remove(out_path)


if __name__ == '__main__':
if __name__ == "__main__":
unittest.main()
71 changes: 39 additions & 32 deletions tree/dataframe/inc/ROOT/RVecDS.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ namespace Internal {

namespace RDF {

class R__CLING_PTRCHECK(off) RVecDSColumnReader final : public ROOT::Detail::RDF::RColumnReaderBase {
TPointerHolder *fPtrHolder;
void *GetImpl(Long64_t) final { return fPtrHolder->GetPointer(); }

public:
RVecDSColumnReader(TPointerHolder *ptrHolder) : fPtrHolder(ptrHolder) {}
};

////////////////////////////////////////////////////////////////////////////////////////////////
/// \brief A RDataSource implementation which takes a collection of RVecs, which
/// are able to adopt data from Numpy arrays
Expand All @@ -46,46 +54,18 @@ class RVecDS final : public ROOT::RDF::RDataSource {
using PointerHolderPtrs_t = std::vector<ROOT::Internal::RDF::TPointerHolder *>;

std::tuple<ROOT::RVec<ColumnTypes>...> fColumns;
const std::vector<std::string> fColNames;
const std::map<std::string, std::string> fColTypesMap;
std::vector<std::string> fColNames;
std::unordered_map<std::string, std::string> fColTypesMap;
// The role of the fPointerHoldersModels is to be initialised with the pack
// of arguments in the constrcutor signature at construction time
// Once the number of slots is known, the fPointerHolders are initialised
// according to the models.
const PointerHolderPtrs_t fPointerHoldersModels;
PointerHolderPtrs_t fPointerHoldersModels;
std::vector<PointerHolderPtrs_t> fPointerHolders;
std::vector<std::pair<ULong64_t, ULong64_t>> fEntryRanges{};
std::function<void()> fDeleteRVecs;

Record_t GetColumnReadersImpl(std::string_view colName, const std::type_info &id)
{
auto colNameStr = std::string(colName);
// This could be optimised and done statically
const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
auto it = fColTypesMap.find(colNameStr);
if (fColTypesMap.end() == it) {
std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
throw std::runtime_error(err);
}

const auto colIdName = it->second;
if (colIdName != idName) {
std::string err = "Column " + colNameStr + " has type " + colIdName +
" while the id specified is associated to type " + idName;
throw std::runtime_error(err);
}

const auto colBegin = fColNames.begin();
const auto colEnd = fColNames.end();
const auto namesIt = std::find(colBegin, colEnd, colName);
const auto index = std::distance(colBegin, namesIt);

Record_t ret(fNSlots);
for (auto slot : ROOT::TSeqU(fNSlots)) {
ret[slot] = fPointerHolders[index][slot]->GetPointerAddr();
}
return ret;
}
Record_t GetColumnReadersImpl(std::string_view, const std::type_info &) { return {}; }

size_t GetEntriesNumber() { return std::get<0>(fColumns).size(); }
template <std::size_t... S>
Expand Down Expand Up @@ -146,6 +126,33 @@ public:
fDeleteRVecs();
}

std::unique_ptr<ROOT::Detail::RDF::RColumnReaderBase>
GetColumnReaders(unsigned int slot, std::string_view colName, const std::type_info &id) final
{
auto colNameStr = std::string(colName);

auto it = fColTypesMap.find(colNameStr);
if (fColTypesMap.end() == it) {
std::string err = "The specified column name, \"" + colNameStr + "\" is not known to the data source.";
throw std::runtime_error(err);
}

const auto &colIdName = it->second;
const auto idName = ROOT::Internal::RDF::TypeID2TypeName(id);
if (colIdName != idName) {
std::string err = "Column " + colNameStr + " has type " + colIdName +
" while the id specified is associated to type " + idName;
throw std::runtime_error(err);
}

if (auto colNameIt = std::find(fColNames.begin(), fColNames.end(), colNameStr); colNameIt != fColNames.end()) {
const auto index = std::distance(fColNames.begin(), colNameIt);
return std::make_unique<ROOT::Internal::RDF::RVecDSColumnReader>(fPointerHolders[index][slot]);
}

throw std::runtime_error("Could not find column name \"" + colNameStr + "\" in available column names.");
}

const std::vector<std::string> &GetColumnNames() const { return fColNames; }

std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges()
Expand Down
Loading