# root_numpy
Allows to read and write ROOT files with data in simple formats inside.

In [1]:
import root_numpy

# Reading .root

In [2]:
ROOT_EXAMPLE_FILE = "public_train_10000.root"

List trees in a ROOT file.

In [3]:
treename = root_numpy.list_trees(ROOT_EXAMPLE_FILE)[0]

List branches

In [4]:
root_numpy.list_branches(ROOT_EXAMPLE_FILE, treename=treename)

['event_id',
 'target',
 'lepton_pt',
 'lepton_eta',
 'lepton_phi',
 'mem_pt',
 'mem_phi',
 'jet1_pt',
 'jet1_eta',
 'jet1_phi',
 'jet1_btag',
 'jet2_pt',
 'jet2_eta',
 'jet2_phi',
 'jet2_btag',
 'jet3_pt',
 'jet3_eta',
 'jet3_phi',
 'jet3_btag',
 'jet4_pt',
 'jet4_eta',
 'jet4_phi',
 'jet4_btag',
 'm_jj',
 'm_jjj',
 'm_lv',
 'm_jlv',
 'm_bb',
 'm_wbb',
 'm_wwbb']

## Main function: reading a ROOT file

In [7]:
root2array_example = root_numpy.root2array(ROOT_EXAMPLE_FILE)

In [8]:
root2array_example.dtype.names

('event_id',
 'target',
 'lepton_pt',
 'lepton_eta',
 'lepton_phi',
 'mem_pt',
 'mem_phi',
 'jet1_pt',
 'jet1_eta',
 'jet1_phi',
 'jet1_btag',
 'jet2_pt',
 'jet2_eta',
 'jet2_phi',
 'jet2_btag',
 'jet3_pt',
 'jet3_eta',
 'jet3_phi',
 'jet3_btag',
 'jet4_pt',
 'jet4_eta',
 'jet4_phi',
 'jet4_btag',
 'm_jj',
 'm_jjj',
 'm_lv',
 'm_jlv',
 'm_bb',
 'm_wbb',
 'm_wwbb')

In [9]:
type(root2array_example)

numpy.ndarray

In [10]:
# We have 10000 entities
root2array_example.shape

(10000,)

In [11]:
# Each is a vector
root2array_example[0]

(1000001, 1.0, 34.75056838989258, 0.7870253324508667, 1.8988913297653198, 20.86243438720703, -2.6229982376098633, 44.81414794921875, -0.16817058622837067, 2.631594657897949, 2.0000228881835938, 57.68952178955078, -0.16162846982479095, -0.6820473074913025, 0.0, 71.9542007446289, 1.1542326211929321, -2.858513116836548, 2.0000159740448, 79.94803619384766, 0.8774719834327698, -0.25673550367355347, 0.0, 81.72444915771484, 189.58314514160156, 80.11831665039062, 170.08607482910156, 91.12820434570312, 298.4687805175781, 374.6857604980469)

In [9]:
# With the fields corresponding to the brnaches
root2array_example.dtype

dtype([('event_id', '<i8'), ('target', '<f4'), ('lepton_pt', '<f4'), ('lepton_eta', '<f4'), ('lepton_phi', '<f4'), ('mem_pt', '<f4'), ('mem_phi', '<f4'), ('jet1_pt', '<f4'), ('jet1_eta', '<f4'), ('jet1_phi', '<f4'), ('jet1_btag', '<f4'), ('jet2_pt', '<f4'), ('jet2_eta', '<f4'), ('jet2_phi', '<f4'), ('jet2_btag', '<f4'), ('jet3_pt', '<f4'), ('jet3_eta', '<f4'), ('jet3_phi', '<f4'), ('jet3_btag', '<f4'), ('jet4_pt', '<f4'), ('jet4_eta', '<f4'), ('jet4_phi', '<f4'), ('jet4_btag', '<f4'), ('m_jj', '<f4'), ('m_jjj', '<f4'), ('m_lv', '<f4'), ('m_jlv', '<f4'), ('m_bb', '<f4'), ('m_wbb', '<f4'), ('m_wwbb', '<f4')])

In [10]:
# Reading can be limited to specific branches
root_numpy.root2array(ROOT_EXAMPLE_FILE, branches=("event_id", "m_lv"))

array([(1000001, 80.11831665039062), (1000002, 83.3215560913086),
       (1000003, 95.30760192871094), ..., (1009998, 81.17916107177734),
       (1009999, 85.55708312988281), (1010000, 78.98086547851562)], 
      dtype=[('event_id', '<i8'), ('m_lv', '<f4')])

In [44]:
# And event range
root_numpy.root2array(ROOT_EXAMPLE_FILE, start=100, stop=500).shape

(400,)

Converting structured array to regular

In [48]:
import pandas as pd # more about Pandas in short time
pd.DataFrame(root2array_example).values

array([[  1.00000100e+06,   1.00000000e+00,   3.47505684e+01, ...,
          9.11282043e+01,   2.98468781e+02,   3.74685760e+02],
       [  1.00000200e+06,   1.00000000e+00,   5.42509270e+01, ...,
          6.71184845e+01,   2.87363983e+02,   5.27247559e+02],
       [  1.00000300e+06,   1.00000000e+00,   4.77460251e+01, ...,
          1.30389206e+02,   2.37879318e+02,   3.36058838e+02],
       ..., 
       [  1.00999800e+06,   1.00000000e+00,   1.03452408e+02, ...,
          1.28377182e+02,   3.80536560e+02,   5.51422424e+02],
       [  1.00999900e+06,   0.00000000e+00,   3.38559570e+01, ...,
          1.40375320e+02,   2.64731140e+02,   3.71888153e+02],
       [  1.01000000e+06,   0.00000000e+00,   5.01330376e+01, ...,
          1.34282303e+02,   3.77124817e+02,   4.87969818e+02]], dtype=float32)

# Writing .root

In [12]:
import numpy as np
random_data = np.random.normal(size=(3, 3))
random_data

array([[-1.14916797,  0.46222215, -1.08987016],
       [ 1.37410833, -0.68674231, -0.4191999 ],
       [-1.39318508, -0.10149691, -0.08431961]])

In [14]:
root_numpy.array2root(random_data, "./random_data.root", mode="recreate")

TypeError: object of type 'NoneType' has no len()

Root needs to explicitly know the branches names. We set them

In [16]:
random_data.dtype = [('a', np.float_), ('b', np.float_), ('c', np.float_)]

And saving works

In [17]:
root_numpy.array2root(random_data, "./random_data.root", mode="recreate")

Verfy

In [6]:
root_numpy.root2array("./random_data.root")

array([(-1.1491679673658817, 0.4622221546089726, -1.0898701638474229),
       (1.3741083313516156, -0.6867423099439857, -0.4191998962807615),
       (-1.393185076553394, -0.10149691040162417, -0.08431960814633446)], 
      dtype=[('a', '<f8'), ('b', '<f8'), ('c', '<f8')])