From 6fc6c62876326f99ae09944dfb12e33e6111b9e6 Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Wed, 18 Jul 2018 16:42:36 +0200 Subject: [PATCH 1/3] Fix #63 --- root_pandas/readwrite.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py index ddc6dc0..05b063b 100644 --- a/root_pandas/readwrite.py +++ b/root_pandas/readwrite.py @@ -81,6 +81,9 @@ def expand_braces(orig): def get_nonscalar_columns(array): + if len(array) == 0: + return [] + first_row = array[0] bad_cols = np.array([x.ndim != 0 for x in first_row]) col_names = np.array(array.dtype.names) From 637211975dc164e0bc246cefa4148747f803c6ff Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Wed, 18 Jul 2018 21:19:19 +0200 Subject: [PATCH 2/3] Avoid yielding empty dataframes --- root_pandas/readwrite.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/root_pandas/readwrite.py b/root_pandas/readwrite.py index 05b063b..cb163ec 100644 --- a/root_pandas/readwrite.py +++ b/root_pandas/readwrite.py @@ -249,6 +249,8 @@ def genchunks(): current_index = 0 for chunk in range(int(ceil(float(n_entries) / chunksize))): arr = root2array(paths, key, all_vars, start=chunk * chunksize, stop=(chunk+1) * chunksize, selection=where, *args, **kwargs) + if len(arr) == 0: + continue if flatten: arr = do_flatten(arr, flatten) yield convert_to_dataframe(arr, start_index=current_index) From 389584be778292c8001a1721a8f32e72859a5809 Mon Sep 17 00:00:00 2001 From: Christopher Burr Date: Wed, 18 Jul 2018 21:23:51 +0200 Subject: [PATCH 3/3] Add test for #63 --- tests/test_issues.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 tests/test_issues.py diff --git a/tests/test_issues.py b/tests/test_issues.py new file mode 100644 index 0000000..2e52f59 --- /dev/null +++ b/tests/test_issues.py @@ -0,0 +1,16 @@ +import os + +import pandas as pd +import root_pandas + + +def test_issue_63(): + df = pd.DataFrame({'a': [], 'b': []}) + root_pandas.to_root(df, 'tmp_1.root', 'my_tree') + df = pd.DataFrame({'a': list(range(10)), 'b': list(range(10))}) + root_pandas.to_root(df, 'tmp_2.root', 'my_tree') + result = list(root_pandas.read_root(['tmp_1.root', 'tmp_2.root'], 'my_tree', where='a > 2', chunksize=1)) + assert len(result) == 7 + assert all(len(df) == 1 for df in result) + os.remove('tmp_1.root') + os.remove('tmp_2.root')