Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unable to open a specific tree: basket 1 in tree/branch xxx has the wrong number of bytes (yyy) for interpretation #1170

Open
MatteoBattisti opened this issue Mar 14, 2024 · 1 comment
Labels
bug (unverified) The problem described would be a bug, but needs to be triaged

Comments

@MatteoBattisti
Copy link

Good morning,
I encountered a problem when I tied to read a specific tree from this root file.
https://drive.google.com/file/d/13KhywjFnUluwCHcSQgo4c9dDAT-jZmII/view?usp=sharing

In particular, I am interested in the data contained in the ':tevent_2nd_integral:/positionISS_v2', branch. When I try to read it with the code below

root_files = '*.root'
for Event in uproot.iterate(root_files+':tevent_2nd_integral:/positionISS_v2', library='np'):
     print(Event['positionISS']['lat'])

I get the following.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/interpretation/numerical.py:359, in AsDtype.basket_array(self, data, byte_offsets, basket, branch, context, cursor_offset, library, options)
    358 try:
--> 359     output = data.view(dtype).reshape((-1, *shape))
    360 except ValueError as err:

ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In[5], line 1
----> 1 for Event in uproot.iterate(root_files+':tevent_2nd_integral:/positionISS_v2', library='np'):
      2     print(Event['positionISS']['lat'])

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:192, in iterate(files, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report, custom_classes, allow_missing, **options)
    190 with hasbranches:
    191     try:
--> 192         for item in hasbranches.iterate(
    193             expressions=expressions,
    194             cut=cut,
    195             filter_name=filter_name,
    196             filter_typename=filter_typename,
    197             filter_branch=filter_branch,
    198             aliases=aliases,
    199             language=language,
    200             step_size=step_size,
    201             decompression_executor=decompression_executor,
    202             interpretation_executor=interpretation_executor,
    203             library=library,
    204             ak_add_doc=ak_add_doc,
    205             how=how,
    206             report=report,
    207         ):
    208             if report:
    209                 arrays, report = item

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:996, in HasBranches.iterate(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report)
    994 if isinstance(self, TBranch) and expressions is None and len(keys) == 0:
    995     filter_branch = uproot._util.regularize_filter(filter_branch)
--> 996     yield from self.parent.iterate(
    997         expressions=expressions,
    998         cut=cut,
    999         filter_name=filter_name,
   1000         filter_typename=filter_typename,
   1001         filter_branch=lambda branch: branch is self and filter_branch(branch),
   1002         aliases=aliases,
   1003         language=language,
   1004         entry_start=entry_start,
   1005         entry_stop=entry_stop,
   1006         step_size=step_size,
   1007         decompression_executor=decompression_executor,
   1008         interpretation_executor=interpretation_executor,
   1009         library=library,
   1010         how=how,
   1011         report=report,
   1012     )
   1014 else:
   1015     entry_start, entry_stop = _regularize_entries_start_stop(
   1016         self.tree.num_entries, entry_start, entry_stop
   1017     )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:1077, in HasBranches.iterate(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report)
   1075 arrays = {}
   1076 interp_options = {"ak_add_doc": ak_add_doc}
-> 1077 _ranges_or_baskets_to_arrays(
   1078     self,
   1079     ranges_or_baskets,
   1080     branchid_interpretation,
   1081     sub_entry_start,
   1082     sub_entry_stop,
   1083     decompression_executor,
   1084     interpretation_executor,
   1085     library,
   1086     arrays,
   1087     True,
   1088     interp_options,
   1089 )
   1091 _fix_asgrouped(
   1092     arrays,
   1093     expression_context,
   (...)
   1097     ak_add_doc,
   1098 )
   1100 output = language.compute_expressions(
   1101     self,
   1102     arrays,
   (...)
   1107     self.object_path,
   1108 )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:3143, in _ranges_or_baskets_to_arrays(hasbranches, ranges_or_baskets, branchid_interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, library, arrays, update_ranges_or_baskets, interp_options)
   3140     pass
   3142 elif isinstance(obj, tuple) and len(obj) == 3:
-> 3143     uproot.source.futures.delayed_raise(*obj)
   3145 else:
   3146     raise AssertionError(obj)

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/source/futures.py:38, in delayed_raise(exception_class, exception_value, traceback)
     34 def delayed_raise(exception_class, exception_value, traceback):
     35     """
     36     Raise an exception from a background thread on the main thread.
     37     """
---> 38     raise exception_value.with_traceback(traceback)

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:3085, in _ranges_or_baskets_to_arrays.<locals>.basket_to_array(basket)
   3082 context = dict(branch.context)
   3083 context["forth"] = forth_context[branch.cache_key]
-> 3085 basket_arrays[basket.basket_num] = interpretation.basket_array(
   3086     basket.data,
   3087     basket.byte_offsets,
   3088     basket,
   3089     branch,
   3090     context,
   3091     basket.member("fKeylen"),
   3092     library,
   3093     interp_options,
   3094 )
   3095 if basket.num_entries != len(basket_arrays[basket.basket_num]):
   3096     raise ValueError(
   3097         """basket {} in tree/branch {} has the wrong number of entries """
   3098         """(expected {}, obtained {}) when interpreted as {}
   (...)
   3106         )
   3107     )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/interpretation/numerical.py:361, in AsDtype.basket_array(self, data, byte_offsets, basket, branch, context, cursor_offset, library, options)
    359             output = data.view(dtype).reshape((-1, *shape))
    360         except ValueError as err:
--> 361             raise ValueError(
    362                 """basket {} in tree/branch {} has the wrong number of bytes ({}) """
    363                 """for interpretation {}
    364 in file {}""".format(
    365                     basket.basket_num,
    366                     branch.object_path,
    367                     len(data),
    368                     self,
    369                     branch.file.file_path,
    370                 )
    371             ) from err
    373         self.hook_after_basket_array(
    374             data=data,
    375             byte_offsets=byte_offsets,
   (...)
    382             options=options,
    383         )
    385         return output

ValueError: basket 1 in tree/branch /tevent_2nd_integral;1:positionISS_v2 has the wrong number of bytes (30102) for interpretation AsDtype("[('timeunix', '>f8'), ('timeunix_gps_low', '>f8'), ('timeunix_gps_high', '>f8'), ('sidereal_gast', '>f8'), ('sidereal_gmst', '>f8'), ('x', '>f8'), ('y', '>f8'), ('z', '>f8'), ('ISS_from_Geocenter_dist', '>f8'), ('vx', '>f8'), ('vy', '>f8'), ('vz', '>f8'), ('v', '>f8'), ('ra', '>f8'), ('dec', '>f8'), ('x_atTime', '>f8'), ('y_atTime', '>f8'), ('z_atTime', '>f8'), ('vx_atTime', '>f8'), ('vy_atTime', '>f8'), ('vz_atTime', '>f8'), ('ra_atTime', '>f8'), ('dec_atTime', '>f8'), ('x_geo', '>f8'), ('y_geo', '>f8'), ('z_geo', '>f8'), ('vx_geo', '>f8'), ('vy_geo', '>f8'), ('vz_geo', '>f8'), ('lon', '>f8'), ('lat', '>f8'), ('alt_km', '>f8'), ('Sun_from_Geocenter_x', '>f8'), ('Sun_from_Geocenter_y', '>f8'), ('Sun_from_Geocenter_z', '>f8'), ('Geocenter_from_Sun_vx', '>f8'), ('Geocenter_from_Sun_vy', '>f8'), ('Geocenter_from_Sun_vz', '>f8'), ('Moon_from_Geocenter_x', '>f8'), ('Moon_from_Geocenter_y', '>f8'), ('Moon_from_Geocenter_z', '>f8'), ('Moon_from_Geocenter_vx', '>f8'), ('Moon_from_Geocenter_vy', '>f8'), ('Moon_from_Geocenter_vz', '>f8'), ('beta', '>f8'), ('heading', '>f8'), ('roll', '>f8'), ('pitch', '>f8'), ('yaw', '>f8'), ('x_geo_roll_ref', '>f8'), ('y_geo_roll_ref', '>f8'), ('z_geo_roll_ref', '>f8'), ('x_geo_pitch_ref', '>f8'), ('y_geo_pitch_ref', '>f8'), ('z_geo_pitch_ref', '>f8'), ('x_geo_yaw_ref', '>f8'), ('y_geo_yaw_ref', '>f8'), ('z_geo_yaw_ref', '>f8'), ('x_rowdir', '>f8'), ('y_rowdir', '>f8'), ('z_rowdir', '>f8'), ('x_coldir', '>f8'), ('y_coldir', '>f8'), ('z_coldir', '>f8'), ('x_optdir', '>f8'), ('y_optdir', '>f8'), ('z_optdir', '>f8'), ('x_geo_rowdir', '>f8'), ('y_geo_rowdir', '>f8'), ('z_geo_rowdir', '>f8'), ('x_geo_coldir', '>f8'), ('y_geo_coldir', '>f8'), ('z_geo_coldir', '>f8'), ('x_geo_optdir', '>f8'), ('y_geo_optdir', '>f8'), ('z_geo_optdir', '>f8'), ('lon_optsurf', '>f8'), ('lat_optsurf', '>f8'), ('x_geo_optsurf', '>f8'), ('y_geo_optsurf', '>f8'), ('z_geo_optsurf', '>f8'), ('pmt_geo_surf', '>f8', (3,)), ('icrfToAtTime_a', '>f8'), ('icrfToAtTime_b', '>f8'), ('icrfToAtTime_c', '>f8'), ('atTimeToIcrf_a', '>f8'), ('atTimeToIcrf_b', '>f8'), ('atTimeToIcrf_c', '>f8'), ('valid', 'u1')]")
in file example.root

I am quite sure that the problem is in the data itself, since if I try to read the similar Branch ':tevent_2nd_integral:/positionISS' I do not get any error (here below the code and the output)

for Event in uproot.iterate(root_files+':tevent_2nd_integral:/positionISS', library='np'):
     print(Event['positionISS']['lat'])

[40.77616887 40.77765956 40.77915019 ... 41.15998649 41.1614595  41.16293244]]

The same problematic branch can however be read out using ROOT or pyROOT, so I was wondering if there was a way to solve the problem in uproot (I am using version 5.3.1, earlier I tried also with version 3).
I think that the problem is the presence of some 'junk' bytes that prevent uproot to read it correctly, but I do not know how to solve the problem.

Thanks!

@MatteoBattisti MatteoBattisti added the bug (unverified) The problem described would be a bug, but needs to be triaged label Mar 14, 2024
@MatteoBattisti
Copy link
Author

There was an error in the first code I wrote, it should have been

root_files = '*.root'
for Event in uproot.iterate(root_files+':tevent_2nd_integral:/positionISS_v2', library='np'):
     print(Event['positionISS_v2']['lat'])

but the error output is basically the same

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/interpretation/numerical.py:359, in AsDtype.basket_array(self, data, byte_offsets, basket, branch, context, cursor_offset, library, options)
    358 try:
--> 359     output = data.view(dtype).reshape((-1, *shape))
    360 except ValueError as err:

ValueError: When changing to a larger dtype, its size must be a divisor of the total size in bytes of the last axis of the array.

The above exception was the direct cause of the following exception:

ValueError                                Traceback (most recent call last)
Cell In[8], line 2
      1 root_files = '*.root'
----> 2 for Event in uproot.iterate(root_files+':tevent_2nd_integral:/positionISS_v2', library='np'):
      3      print(Event['positionISS_v2']['lat'])

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:192, in iterate(files, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report, custom_classes, allow_missing, **options)
    190 with hasbranches:
    191     try:
--> 192         for item in hasbranches.iterate(
    193             expressions=expressions,
    194             cut=cut,
    195             filter_name=filter_name,
    196             filter_typename=filter_typename,
    197             filter_branch=filter_branch,
    198             aliases=aliases,
    199             language=language,
    200             step_size=step_size,
    201             decompression_executor=decompression_executor,
    202             interpretation_executor=interpretation_executor,
    203             library=library,
    204             ak_add_doc=ak_add_doc,
    205             how=how,
    206             report=report,
    207         ):
    208             if report:
    209                 arrays, report = item

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:996, in HasBranches.iterate(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report)
    994 if isinstance(self, TBranch) and expressions is None and len(keys) == 0:
    995     filter_branch = uproot._util.regularize_filter(filter_branch)
--> 996     yield from self.parent.iterate(
    997         expressions=expressions,
    998         cut=cut,
    999         filter_name=filter_name,
   1000         filter_typename=filter_typename,
   1001         filter_branch=lambda branch: branch is self and filter_branch(branch),
   1002         aliases=aliases,
   1003         language=language,
   1004         entry_start=entry_start,
   1005         entry_stop=entry_stop,
   1006         step_size=step_size,
   1007         decompression_executor=decompression_executor,
   1008         interpretation_executor=interpretation_executor,
   1009         library=library,
   1010         how=how,
   1011         report=report,
   1012     )
   1014 else:
   1015     entry_start, entry_stop = _regularize_entries_start_stop(
   1016         self.tree.num_entries, entry_start, entry_stop
   1017     )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:1077, in HasBranches.iterate(self, expressions, cut, filter_name, filter_typename, filter_branch, aliases, language, entry_start, entry_stop, step_size, decompression_executor, interpretation_executor, library, ak_add_doc, how, report)
   1075 arrays = {}
   1076 interp_options = {"ak_add_doc": ak_add_doc}
-> 1077 _ranges_or_baskets_to_arrays(
   1078     self,
   1079     ranges_or_baskets,
   1080     branchid_interpretation,
   1081     sub_entry_start,
   1082     sub_entry_stop,
   1083     decompression_executor,
   1084     interpretation_executor,
   1085     library,
   1086     arrays,
   1087     True,
   1088     interp_options,
   1089 )
   1091 _fix_asgrouped(
   1092     arrays,
   1093     expression_context,
   (...)
   1097     ak_add_doc,
   1098 )
   1100 output = language.compute_expressions(
   1101     self,
   1102     arrays,
   (...)
   1107     self.object_path,
   1108 )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:3143, in _ranges_or_baskets_to_arrays(hasbranches, ranges_or_baskets, branchid_interpretation, entry_start, entry_stop, decompression_executor, interpretation_executor, library, arrays, update_ranges_or_baskets, interp_options)
   3140     pass
   3142 elif isinstance(obj, tuple) and len(obj) == 3:
-> 3143     uproot.source.futures.delayed_raise(*obj)
   3145 else:
   3146     raise AssertionError(obj)

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/source/futures.py:38, in delayed_raise(exception_class, exception_value, traceback)
     34 def delayed_raise(exception_class, exception_value, traceback):
     35     """
     36     Raise an exception from a background thread on the main thread.
     37     """
---> 38     raise exception_value.with_traceback(traceback)

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/behaviors/TBranch.py:3085, in _ranges_or_baskets_to_arrays.<locals>.basket_to_array(basket)
   3082 context = dict(branch.context)
   3083 context["forth"] = forth_context[branch.cache_key]
-> 3085 basket_arrays[basket.basket_num] = interpretation.basket_array(
   3086     basket.data,
   3087     basket.byte_offsets,
   3088     basket,
   3089     branch,
   3090     context,
   3091     basket.member("fKeylen"),
   3092     library,
   3093     interp_options,
   3094 )
   3095 if basket.num_entries != len(basket_arrays[basket.basket_num]):
   3096     raise ValueError(
   3097         """basket {} in tree/branch {} has the wrong number of entries """
   3098         """(expected {}, obtained {}) when interpreted as {}
   (...)
   3106         )
   3107     )

File ~/miniconda3/envs/etos_env/lib/python3.9/site-packages/uproot/interpretation/numerical.py:361, in AsDtype.basket_array(self, data, byte_offsets, basket, branch, context, cursor_offset, library, options)
    359             output = data.view(dtype).reshape((-1, *shape))
    360         except ValueError as err:
--> 361             raise ValueError(
    362                 """basket {} in tree/branch {} has the wrong number of bytes ({}) """
    363                 """for interpretation {}
    364 in file {}""".format(
    365                     basket.basket_num,
    366                     branch.object_path,
    367                     len(data),
    368                     self,
    369                     branch.file.file_path,
    370                 )
    371             ) from err
    373         self.hook_after_basket_array(
    374             data=data,
    375             byte_offsets=byte_offsets,
   (...)
    382             options=options,
    383         )
    385         return output

ValueError: basket 4 in tree/branch /tevent_2nd_integral;1:positionISS_v2 has the wrong number of bytes (30102) for interpretation AsDtype("[('timeunix', '>f8'), ('timeunix_gps_low', '>f8'), ('timeunix_gps_high', '>f8'), ('sidereal_gast', '>f8'), ('sidereal_gmst', '>f8'), ('x', '>f8'), ('y', '>f8'), ('z', '>f8'), ('ISS_from_Geocenter_dist', '>f8'), ('vx', '>f8'), ('vy', '>f8'), ('vz', '>f8'), ('v', '>f8'), ('ra', '>f8'), ('dec', '>f8'), ('x_atTime', '>f8'), ('y_atTime', '>f8'), ('z_atTime', '>f8'), ('vx_atTime', '>f8'), ('vy_atTime', '>f8'), ('vz_atTime', '>f8'), ('ra_atTime', '>f8'), ('dec_atTime', '>f8'), ('x_geo', '>f8'), ('y_geo', '>f8'), ('z_geo', '>f8'), ('vx_geo', '>f8'), ('vy_geo', '>f8'), ('vz_geo', '>f8'), ('lon', '>f8'), ('lat', '>f8'), ('alt_km', '>f8'), ('Sun_from_Geocenter_x', '>f8'), ('Sun_from_Geocenter_y', '>f8'), ('Sun_from_Geocenter_z', '>f8'), ('Geocenter_from_Sun_vx', '>f8'), ('Geocenter_from_Sun_vy', '>f8'), ('Geocenter_from_Sun_vz', '>f8'), ('Moon_from_Geocenter_x', '>f8'), ('Moon_from_Geocenter_y', '>f8'), ('Moon_from_Geocenter_z', '>f8'), ('Moon_from_Geocenter_vx', '>f8'), ('Moon_from_Geocenter_vy', '>f8'), ('Moon_from_Geocenter_vz', '>f8'), ('beta', '>f8'), ('heading', '>f8'), ('roll', '>f8'), ('pitch', '>f8'), ('yaw', '>f8'), ('x_geo_roll_ref', '>f8'), ('y_geo_roll_ref', '>f8'), ('z_geo_roll_ref', '>f8'), ('x_geo_pitch_ref', '>f8'), ('y_geo_pitch_ref', '>f8'), ('z_geo_pitch_ref', '>f8'), ('x_geo_yaw_ref', '>f8'), ('y_geo_yaw_ref', '>f8'), ('z_geo_yaw_ref', '>f8'), ('x_rowdir', '>f8'), ('y_rowdir', '>f8'), ('z_rowdir', '>f8'), ('x_coldir', '>f8'), ('y_coldir', '>f8'), ('z_coldir', '>f8'), ('x_optdir', '>f8'), ('y_optdir', '>f8'), ('z_optdir', '>f8'), ('x_geo_rowdir', '>f8'), ('y_geo_rowdir', '>f8'), ('z_geo_rowdir', '>f8'), ('x_geo_coldir', '>f8'), ('y_geo_coldir', '>f8'), ('z_geo_coldir', '>f8'), ('x_geo_optdir', '>f8'), ('y_geo_optdir', '>f8'), ('z_geo_optdir', '>f8'), ('lon_optsurf', '>f8'), ('lat_optsurf', '>f8'), ('x_geo_optsurf', '>f8'), ('y_geo_optsurf', '>f8'), ('z_geo_optsurf', '>f8'), ('pmt_geo_surf', '>f8', (3,)), ('icrfToAtTime_a', '>f8'), ('icrfToAtTime_b', '>f8'), ('icrfToAtTime_c', '>f8'), ('atTimeToIcrf_a', '>f8'), ('atTimeToIcrf_b', '>f8'), ('atTimeToIcrf_c', '>f8'), ('valid', 'u1')]")
in file example.root

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug (unverified) The problem described would be a bug, but needs to be triaged
Projects
Status: Deserialization
Development

No branches or pull requests

1 participant