### Set autoreloading
This extension will automatically update with any changes to packages in real time

In [None]:
%load_ext autoreload
%autoreload 2

### Import packages

In [None]:
import os
import tqdm
import numpy as np
import h5py
import pynuml
import nugraph as ng

### Configure arguments
Name of input file and number of events to write to summary file

In [None]:
FILE = os.path.expandvars("$NUGRAPH_DATA/uboone-opendata/uboone-opendata-e5fac1ac.evt.h5")
NUM_EVTS = 20

### Get list of indices
Loop over events in file and construct a list of events that will produce valid graphs

In [None]:
ids = {}
f = pynuml.io.File(FILE)
processor = pynuml.process.HitGraphProducer(file=f)
for i, evt in enumerate(f):
    if not evt:
        continue
    name, data = processor(evt)
    if data:
        ids[i] = evt.event_id
    if len(ids) == NUM_EVTS:
        break
del f

### Extract raw arrays
Loop over datasets in input HDF5 file, and extract arrays for the events selected above

In [None]:
with h5py.File(FILE) as f:

    # initialize empty array dictionary
    keys = []
    arrays = {}
    for group in f.keys():
        for key in f[group].keys():
            if ".seq" in key:
                continue
            keys.append(f"{group}/{key}")
            arrays[keys[-1]] = []

    # read arrays from file
    for i, event_id in tqdm.tqdm(ids.items()):
        mask = {g: ((f[g]["event_id.seq"][()] == i)[:, 0]).nonzero() for g in f.keys()}
        for key in keys:
            g, k = key.split("/")
            arrays[key].append(f[key][mask[g]])

### Write to summary file
Combine arrays across summary events, and write them to a new summary file

In [None]:
with h5py.File(FILE[:-7]+".test.h5", "w") as f:
    for key in keys:
        f[key] = np.concatenate(arrays[key])