Skip to content
This repository has been archived by the owner on Jan 9, 2023. It is now read-only.

Commit

Permalink
Add initial package
Browse files Browse the repository at this point in the history
  • Loading branch information
ibab committed Jan 16, 2015
1 parent 54874fe commit 6f66502
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 0 deletions.
7 changes: 7 additions & 0 deletions LICENSE
@@ -0,0 +1,7 @@
Copyright (c) 2015 Igor Babuschkin

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
109 changes: 109 additions & 0 deletions root_pandas/__init__.py
@@ -0,0 +1,109 @@

"""
A module that extends pandas to support the ROOT data format.
"""

from pandas import DataFrame
from root_numpy import root2array, list_trees
from fnmatch import fnmatch
from root_numpy import list_branches

__all__ = ['read_root']

def get_matching_variables(fname, tree, patterns):
branches = list_branches(fname, tree)

selected = []

for p in patterns:
for b in branches:
if fnmatch(b, p) and not b in selected:
selected.append(b)
return selected

def read_root(fname, tree_name=None, variables=None, ignore=None, *kargs, **kwargs):
"""
Read a ROOT file into a pandas DataFrame.
Further *kargs and *kwargs are passed to root_numpy's root2array.
If the root file contains a branch called index, it will become the DataFrame's index.
Parameters
----------
fname: string
The filename of the root file
tree_name: string
The name of the tree to load
variables: sequence
A sequence of shell-patterns. Matching variables are read.
ignore: sequence
A sequence of shell-patterns. All matching variables are ignored (overriding the variables argument)
Returns
-------
DataFrame from the ROOT file
Notes
-----
>>> df = read_root('test.root', 'MyTree', variables=['x_*', 'y_*'], selection='x_1 > 100')
"""
if not tree_name:
branches = list_trees(fname)
if len(branches) == 1:
tree_name = branches[0]
else:
raise ValueError('More than one tree found in {}'.format(fname))

if not variables:
all_vars = None
else:
# index is always loaded if it exists
variables.append('index')
all_vars = get_matching_variables(fname, tree_name, variables)

if ignore:
if not all_vars:
all_vars = get_matching_variables(fname, tree_name, ['*'])

ignored = get_matching_variables(fname, tree_name, ignore)
if 'index' in ignored:
raise ValueError('index variable is being ignored!')
for var in ignored:
all_vars.remove(var)

arr = root2array(fname, tree_name, all_vars, *kargs, **kwargs)
if 'index' in arr.dtype.names:
df = DataFrame.from_records(arr, index='index')
else:
df = DataFrame.from_records(arr)
return df

def to_root(df, fname, tree_name="default", *kargs, **kwargs):
"""
Write DataFrame to a ROOT file.
Parameters
----------
fname: string
File path to new ROOT file (will be overwritten)
tree_name: string
Name of tree that the DataFrame will be saved as
Notes
-----
Further *kargs and *kwargs are passed to root_numpy's array2root.
>>> df = DataFrame({'x': [1,2,3], 'y': [4,5,6]})
>>> df.to_root('test.root')
The DataFrame index will be saved as a branch called 'index'.
"""
from root_numpy import array2root
arr = df.to_records()
array2root(arr, fname, tree_name, 'recreate', *kargs, **kwargs)

# Patch pandas DataFrame to support to_root method
DataFrame.to_root = to_root

11 changes: 11 additions & 0 deletions setup.py
@@ -0,0 +1,11 @@
from setuptools import setup

setup(name='root_pandas',
version='0.1',
description='Read and save DataFrames from and to ROOT files',
url='http://github.com/ibab/root_pandas',
author='Igor Babuschkin',
author_email='igor@babuschk.in',
license='MIT',
packages=['root_pandas'],
zip_safe=False)

0 comments on commit 6f66502

Please sign in to comment.