<br><br><br><br><br>

# uproot: array-based ROOT I/O

<br><br><br><br><br>

In [3]:
# In uproot, files are presented with the same interface as Python dicts, including subdirectores.

import uproot
file = uproot.open("http://scikit-hep.org/uproot/examples/nesteddirs.root")
file.keys()

# Exercise: how many objects named "tree" does this file contain?

[b'one;1', b'three;1']

In [4]:
# TTrees are also presented like Python dicts.

events = uproot.open("http://scikit-hep.org/uproot/examples/Zmumu.root")["events"]
events.keys()

[b'Type',
 b'Run',
 b'Event',
 b'E1',
 b'px1',
 b'py1',
 b'pz1',
 b'pt1',
 b'eta1',
 b'phi1',
 b'Q1',
 b'E2',
 b'px2',
 b'py2',
 b'pz2',
 b'pt2',
 b'eta2',
 b'phi2',
 b'Q2',
 b'M']

<p style="font-size: 2em">Three basic ways to get data:</p>

<table width="100%" style="font-size: 1.25em"><tr>
    <td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Direct</p>
        <p>Read the file and return an array.</p>
        <ul>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#id11">TBranch.array</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#array">TTree.array</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#arrays">TTree.arrays</a></li>
        </ul>
    </td><td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Lazy</p>
        <p>Get an object that reads on demand.</p>
        <ul>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#id13">TBranch.lazyarray</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#lazyarray">TTree.lazyarray</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#lazyarrays">TTree.lazyarrays</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/opening-files.html#uproot-lazyarray-and-lazyarrays">uproot.lazyarray</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/opening-files.html#uproot-lazyarray-and-lazyarrays">uproot.lazyarrays</a></li>
        </ul>
    </td><td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Iterative</p>
        <p>Read arrays in batches of entries.</p>
        <ul>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/ttree-handling.html#iterate">TTree.iterate</a></li>
            <li style="margin-bottom: 0.3em"><a href="https://uproot.readthedocs.io/en/latest/opening-files.html#uproot-iterate">uproot.iterate</a></li>
        </ul>
    </td>
</tr></table>

In [5]:
# Direct:

events.array("E1")

array([82.20186639, 62.34492895, 62.34492895, ..., 81.27013558,
       81.27013558, 81.56621735])

In [13]:
# Lazy:

array = events.lazyarray("E1", entrysteps=500)
print([len(x) for x in array.chunks])

array

[500, 500, 500, 500, 304]


<ChunkedArray [82.2018663875 62.3449289481 62.3449289481 ... 81.2701355756 81.2701355756 81.5662173543] at 0x7f3aa31752e8>

In [10]:
# Iterative:

for chunk in events.iterate("E1", entrysteps=500):
    print(len(chunk[b"E1"]), chunk[b"E1"][:5])

500 [82.20186639 62.34492895 62.34492895 60.62187459 41.82638891]
500 [45.7959062  45.7108209  98.03548137 53.53706097 53.53706097]
500 [31.4488924  31.4488924  31.39073377 64.85457285 46.92904281]
500 [88.35872379 52.5560163  52.5560163  52.38402383 75.34015706]
304 [46.0752809  46.0752809  45.93225789 51.01950403 84.91115834]


<p style="font-size: 2em">Advantages and disadvantages of each:</p>

<table width="100%" style="font-size: 1.25em"><tr>
    <td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Direct</p>
        <p>Simple; most libraries will recognize the Numpy array you get back.</p>
    </td><td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Lazy</p>
        <p>Transparently work on data too large to fit into memory.</p>
    </td><td width="33%" style="vertical-align: top">
        <p style="font-weight: bold; font-size: 1.5em; margin-bottom: 0.5em">Iterative</p>
        <p>Control the loading of data into and out of memory.</p>
    </td>
</tr></table>

In [16]:
import numpy

# Exercise: compute numpy.sqrt(E1**2 - px1**2 - py1**2 - pz1**2) in all three modes.

arrays = events.arrays(["E1", "p[xyz]1"], namedecode="utf-8")
E1, px1, py1, pz1 = arrays["E1"], arrays["px1"], arrays["py1"], arrays["pz1"]

result = numpy.sqrt(E1**2 - px1**2 - py1**2 - pz1**2)
result

array([0.10565837, 0.10565839, 0.10565839, ..., 0.10565838, 0.10565838,
       0.10565833])