This repository has been archived by the owner on Jun 21, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 67
/
__init__.py
161 lines (111 loc) · 6.28 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/usr/bin/env python
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot/blob/master/LICENSE
"""uproot -- ROOT I/O in pure Python and Numpy.
Basic cheat-sheet
-----------------
Open ROOT files with uproot.open (for reading) or uproot.create (for read-write).
file = uproot.open("/path/to/my/file.root")
file = uproot.open("root://path/to/my/file.root")
file = uproot.open("http://path/to/my/file.root")
writeable = uproot.create("/new/local/file.root")
These file objects act like dicts; get objects like TTrees from them with square brackets.
tree = file["path/to/events"]
tree = file["path/to/events;2"] # optional cycle number
writeable["name"] = numpy.histogram(...) # write to files by assignment (histograms only)
TTree objects also act like dicts; get branches with square brackets or list them with keys().
tree.keys()
tree.allkeys() # recursive branches-of-branches
tree.show() # display view
tree["mybranch"] # searches recursively
Get data as arrays with an array(...) or arrays(...) call.
tree["mybranch"].array()
tree.array("mybranch")
tree.arrays(["branch1", "branch2", "branch3"])
tree.arrays(["Muon_*"])
Variable numbers of objects per entry (particles per event) are handled by awkward-array:
https://github.com/scikit-hep/awkward-array
The arrays(...) call returns a dict from branch name (bytes) to data (Numpy array) by default.
Change this by passing an outputtype class (e.g. dict, tuple, pandas.DataFrame).
x, y, z = tree.arrays(["x", "y", "z"], outputtype=tuple)
For more idiomatic Pandas defaults, use tree.pandas.df().
df = tree.pandas.df()
If the desired branches do not fit into memory, iterate over chunks of entries with iterate().
The interface is the same as above: you get the same dict/tuple/DataFrame with fewer entries.
for x, y, z in tree.iterate(["x", "y", "z"], outputtype=tuple):
do_something(x, y, z)
To iterate over many files (like TChain), do uproot.iterate(...).
for arrays in uproot.iterate("files*.root", "path/to/events", ["Muon_*"]):
do_something(arrays)
Intermediate cheat-sheet
------------------------
Each call to array/arrays/iterate reads the file again. For faster access after the first time,
pass a dict-like object to the cache parameter and uproot will try the cache first.
cache = {}
arrays = tree.arrays(["Muon_*"], cache=cache) # slow
arrays = tree.arrays(["Muon_*"], cache=cache) # fast
You control the cache object. If you're running out of memory, remove it or remove items from it.
Or use one of the dict-like caches from cachetools (already installed) or another library.
For parallel processing, pass a Python 3 executor.
import concurrent.futures
executor = concurrent.futures.ThreadPoolExecutor(32)
arrays = tree.arrays(["Muon_*"], executor=executor)
To get the number of entries per file in a a collection of files, use uproot.numentries().
uproot.numentries("tests/samples/sample*.root", "sample", total=False)
For arrays that read on demand, use uproot.lazyarray and uproot.lazyarrays.
For processing with Dask, use uproot.daskarray, uproot.daskarrays, or uproot.daskframe.
Advanced cheat-sheet
--------------------
The standard bytes-to-arrays decoding is attached to each branch as
tree["mybranch"].interpretation
This can be overridden by passing a new interpretation to array/arrays/iterate.
Most reinterpretations will produce wrong values (it's a reinterpret_cast<...>).
Some, however, are useful:
mybranch = tree["mybranch"]
fill_me_instead = numpy.empty(big_enough)
mybranch.array(mybranch.interpretation.toarray(fill_me_instead))
fill_me_instead # filled in place
mybranch.array(uproot.asdebug) # view raw bytes of each entry
By default, local files are read as memory-mapped arrays. Change this by setting
open("...", localsource=lambda path: uproot.LocalSource(path, **uproot.LocalSource.defaults))
The same procedure sets options for uproot.XRootDSource and uproot.HTTPSource.
"""
from __future__ import absolute_import
# high-level entry points
from uproot.rootio import open, xrootd, http
from uproot.tree import iterate, numentries, lazyarray, lazyarrays, daskarray, daskframe
from uproot.write.TFile import TFileCreate as create
from uproot.write.TFile import TFileRecreate as recreate
from uproot.write.TFile import TFileUpdate as update
from uproot.write.compress import ZLIB, LZMA, LZ4
from uproot.write.objects.TTree import newtree, newbranch
from uproot.source.memmap import MemmapSource
from uproot.source.file import FileSource
from uproot.source.xrootd import XRootDSource
from uproot.source.http import HTTPSource
from uproot.cache import ArrayCache, ThreadSafeArrayCache
from uproot.interp.auto import interpret
from uproot.interp.numerical import asdtype
from uproot.interp.numerical import asarray
from uproot.interp.numerical import asdouble32
from uproot.interp.numerical import asstlbitset
from uproot.interp.jagged import asjagged
from uproot.interp.objects import astable
from uproot.interp.objects import asobj
from uproot.interp.objects import asgenobj
from uproot.interp.objects import asstring
from uproot.interp.objects import SimpleArray
from uproot.interp.objects import STLVector
from uproot.interp.objects import STLMap
from uproot.interp.objects import STLString
from uproot.interp.objects import Pointer
asdebug = asjagged(asdtype("u1"))
from uproot import pandas
# put help strings on everything (they're long, too disruptive to intersperse
# in the code, and are built programmatically to avoid duplication; Python's
# inline docstring method doesn't accept non-literals)
import uproot._help
# convenient access to the version number
from uproot.version import __version__
# don't expose uproot.uproot; it's ugly
del uproot
__all__ = ["open", "xrootd", "http", "iterate", "numentries", "lazyarray", "lazyarrays", "daskarray", "daskframe", "create", "recreate", "update", "ZLIB", "LZMA", "LZ4", "ZSTD", "newtree", "newbranch", "MemmapSource", "FileSource", "XRootDSource", "HTTPSource", "ArrayCache", "ThreadSafeArrayCache", "interpret", "asdtype", "asarray", "asdouble32", "asstlbitset", "asjagged", "astable", "asobj", "asgenobj", "asstring", "asdebug", "SimpleArray", "STLVector", "STLMap", "STLString", "Pointer", "pandas", "__version__"]