In [545]:
import types
import inspect
import traceback
from rootpy.tree import IntCol, FloatCol, BoolCol
import numpy as np
from collections import OrderedDict
from rootpy.tree import TreeBuffer
from pax.data_model import StrictModel,Model,ListField
from rootpy.tree import TreeModel
from pax.utils import Memoize
import json
import bson
import rootpy.stl as stl
import cppyy
casting_allowed_for = {
    int:    ['int16', 'int32', 'int64', 'Int64', 'Int32'],
    float:  ['int', 'float32', 'float64', 'int16', 'int32', 'int64', 'Int64', 'Int32'],
    bool:   ['int16', 'int32', 'int64', 'Int64', 'Int32'],
    stl.string:    ['str'],
    ROOT.vector(int): ['list'],
    ROOT.vector(float): ['list'],
    ROOT.vector(bool): ['list'],
    ROOT.vector(stl.string): ['list']
}

    
class PaxTreeBuffer(TreeBuffer):
    def __post_init__(self, kwargs_dict=None, quick_init=False, **kwargs):
        if quick_init:
            self.__dict__.update(kwargs_dict)
            self.__dict__.update(kwargs)
            return

        # Initialize the collection fields to empty lists
        # super() is needed to bypass type checking in StrictModel
        list_field_info = self.get_list_field_info()
        for field_name in list_field_info:
            super(TreeBuffer).__setattr__(field_name, [])

        # Initialize all attributes from kwargs and kwargs_dict
        kwargs.update(kwargs_dict or {})
        for k, v in kwargs.items():
            if k in list_field_info:
                # User gave a value to initialize a list field. Hopefully an iterable!
                # Let's check if the types are correct
                desired_type = list_field_info[k]
                temp_list = []
                for el in v:
                    if isinstance(el, desired_type):
                        # Good, pass through unmolested
                        temp_list.append(el)
                    elif isinstance(el, dict):
                        # Dicts are fine too, we can use them to init the desired type
                        temp_list.append(desired_type(**el))
                    else:
                        raise ValueError("Attempt to initialize list field %s with type %s, "
                                         "but you promised type %s in class declaration." % (k,
                                                                                             type(el),
                                                                                             desired_type))
                # This has to be a list of dictionaries
                # suitable to be passed to __init__ of the list field's element type
                setattr(self, k, temp_list)
            else:
                default_value = getattr(self, k)
                if type(default_value) == np.ndarray:
                    if isinstance(v, np.ndarray):
                        pass
                    elif isinstance(v, bytes):
                        # Numpy arrays can be also initialized from a 'string' of bytes...
                        v = np.fromstring(v, dtype=default_value.dtype)
                    elif hasattr(v, '__iter__'):
                        # ... or an iterable
                        v = np.array(v, dtype=default_value.dtype)
                    else:
                        raise ValueError("Can't initialize field %s: "
                                         "don't know how to make a numpy array from a %s" % (k, type(v)))
                elif isinstance(default_value, Model):
                    v = default_value.__class__(**v)
                elif isinstance(v, stl.string): 
                    v = str(v)                    
                elif isinstance(v, stl.vector(stl.string)):
                    if not v.empty():
                        v = [str(el) for el in v]
                    else:
                        v = []  
                elif isinstance(v, (stl.vector(int),
                                   stl.vector(float),
                                   stl.vector(bool)
                               )):
                    if not v.empty():
                        v = list(v)
                    else:
                        v = []

                setattr(self, k, v)

    def to_json(self, fields_to_ignore=None):
        return json.dumps(self.to_dict(convert_numpy_arrays_to='list',
                                       fields_to_ignore=fields_to_ignore)
#                           , cls=ROOTJSONEncoder
                         )

    def to_bson(self, fields_to_ignore=None):
        return bson.BSON.encode(self.to_dict(convert_numpy_arrays_to='bytes',
                                             fields_to_ignore=fields_to_ignore))

    def __setattr__(self, key, value): 
        # Get the old attr.
        # #Will raise AttributeError if doesn't exists, which is what we want
        if(key.startswith('_')): # TreeBuffer attributes for the ROOT tree
            super().__setattr__(key, value)
            
        # model fields    
        old_val = getattr(self, key)
        old_type = type(old_val)
        new_type = type(value)
        # Check for attempted type change
        if old_type != new_type:

            # Are we allowed to cast the type?
            if old_type in casting_allowed_for \
                    and value.__class__.__name__ in casting_allowed_for[old_type]:
                if(old_type in [stl.vector(int), stl.vector(bool), stl.vector(float), stl.vector(stl.string)]):
                    prev_value = value
                    value=old_type()
                    for el in prev_value: # TODO: more efficient way to populate the stl.vector
                          value.push_back(str(el) if(old_type==stl.vector(stl.string)) else el)
                else:    
                    value = old_type(value)
            else:
                raise TypeError('Attribute %s of class %s should be a %s, not a %s. '
                                % (key,
                                   self.__class__.__name__,
                                   old_val.__class__.__name__,
                                   value.__class__.__name__))

        # Check for attempted dtype change
        if isinstance(old_val, np.ndarray):
            if old_val.dtype != value.dtype:
                raise TypeError('Attribute %s of class %s should have numpy dtype %s, not %s' % (
                    key, self.__class__.__name__, old_val.dtype, value.dtype))
                
        super().__setattr__(key, value)
    
                
    @classmethod        # Use only in initialization (or if attributes are fixed, as for StrictModel)
    @Memoize            # Caching decorator, improves performance if a model is initialized often
    def get_list_field_info(cls):
        """Return dict with fielname => type of elements in collection fields in this class
        """
        list_field_info = {}
        for k, v in cls.__dict__.items():
            if isinstance(v, ListField):
                list_field_info[k] = v.element_type
        return list_field_info
    
    def __str__(self): #TODO change
        return str(self.__dict__['_OrderedDict__map'])
    
    def get_fields_data(self):
        """Iterator over (key, value) tuples of all user-specified fields
        Returns keys in lexical order
        """
        # TODO: increase performance by pre-sorting keys?
        # self.__dict__.items() does not return default values set in class declaration
        # Hence we need something more complicated
        class_dict = self.__dict__['_OrderedDict__map']
        self_dict = self.__dict__
        
        for field_name in sorted(class_dict.keys()):
            if field_name in self_dict:
                # The instance has a value for this field: return it
                yield (field_name, self_dict[field_name])
            else:
                # ... it doesnt. Should we return its value?
                if field_name.startswith('_'):
                    continue    # No, is internal
                value_in_class = self.__getattr__(field_name) #TODO: or __getitem (returns wrapper object)
                if callable(value_in_class):
                    continue    # No, is a method
                if isinstance(value_in_class, (property, classmethod)):
                    continue    # No, is a property or classmethod
                # Yes, yield the class-level value
                yield (field_name, value_in_class)
    
    def to_dict(self, convert_numpy_arrays_to=None, fields_to_ignore=None):
        result = {}
        if fields_to_ignore is None:
            fields_to_ignore = tuple()
        for k, v in self.get_fields_data():
            if k in fields_to_ignore:
                continue
            if isinstance(v, Model):
                result[k] = v.to_dict(convert_numpy_arrays_to=convert_numpy_arrays_to,
                                      fields_to_ignore=fields_to_ignore)
            elif isinstance(v, list):
                result[k] = [el.to_dict(convert_numpy_arrays_to=convert_numpy_arrays_to,
                                        fields_to_ignore=fields_to_ignore) for el in v]
            # dealing with ROOT stl.string fields
            elif isinstance(v, stl.string):
                result[k] = str(v)
            # dealing with ROOT.vectors
            elif isinstance(v, (stl.vector(int),
                               stl.vector(float),
                               stl.vector(bool)
                               )):
                if not v.empty():
                    result[k] = list(v)
                else:
                    result[k] = []
            elif isinstance(v, stl.vector(stl.string)):
                if not v.empty():
                    result[k] = [str(el) for el in v]
                else:
                    result[k] = []
            elif isinstance(v, np.ndarray) and convert_numpy_arrays_to is not None:
                if convert_numpy_arrays_to == 'list':
                    result[k] = v.tolist()
                elif convert_numpy_arrays_to == 'bytes':
                    result[k] = v.tostring()
                else:
                    raise ValueError('convert_numpy_arrays_to must be "list" or "bytes"')
            else:
                result[k] = v
        return result            

    
class ROOTModel(StrictModel):
    def __new__(cls, kwargs_dict=None, quick_init=False, **kwargs):
        treebuffer = PaxTreeBuffer()
        for name, attr in cls.get_attrs():
            treebuffer[name] = attr()

        treebuffer.__post_init__(kwargs_dict, quick_init, **kwargs) # We need TreeBuffer to have to_dict, to_json...   
        return treebuffer
        
EventModel = type("EventModel", (ROOTModel, TreeModel,), {'__new__':ROOTModel.__new__})
    
class Event(EventModel):
    int_attribute = IntCol(5)
    float_attribute = FloatCol(0.0)
    boolean_attribute = BoolCol(False)
    string_attribute = stl.string
    vector_attribute = stl.vector(int)
    vector_strings = stl.vector(stl.string)

In [546]:
# you can use it the "pax way"
event = Event( kwargs_dict={'int_attribute':222,'float_attribute':12.34,'string_attribute':'one_detector','vector_attribute':[1,2,3],'vector_strings':['one','two']}, quick_init=False) 

In [547]:
# or just populate some fields, and leave defaults elsewhere
event1 = Event(kwargs_dict={'float_attribute':12.34,'vector_attribute':[1,2,3]}, quick_init=False)

In [548]:
# or don't populate
event2 = Event()

In [549]:
# inheritancce tree
inspect.getmro(Event)

(string_attribute --> <class 'cppyy.string'>
 vector_attribute --> <class 'cppyy.vector<int>'>
 vector_strings --> <class 'cppyy.vector<string>'>
 int_attribute --> IntCol(5)
 float_attribute --> FloatCol(0.0)
 boolean_attribute --> BoolCol(False),
 ,
 __main__.ROOTModel,
 ,
 pax.data_model.StrictModel,
 pax.data_model.Model,
 object)

In [550]:
dir(EventModel) 
# the problem is, once you instantiate the default TreeModel of rootpy, 
# it returns a TreeBuffer object instead, so the Event functionality is lost (to_dict, to_json, get_fields ...)  
# And this one allows to use the Event data structure as a tree at the same time

['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 'from_bson',
 'from_json',
 'get_fields_data',
 'get_list_field_info',
 'to_bson',
 'to_dict',
 'to_json']

In [553]:
event.to_dict() # the vector, and the stl.string, we needed a way to serialize it also

{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 222,
 'string_attribute': 'one_detector',
 'vector_attribute': [1, 2, 3],
 'vector_strings': ['one', 'two']}

In [554]:
event1.to_dict()

{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 5,
 'string_attribute': '',
 'vector_attribute': [1, 2, 3],
 'vector_strings': []}

In [555]:
event2.to_dict() # default

{'boolean_attribute': 0,
 'float_attribute': 0.0,
 'int_attribute': 5,
 'string_attribute': '',
 'vector_attribute': [],
 'vector_strings': []}

In [556]:
event.vector_strings.push_back('three')
event.vector_attribute.push_back(65)
event.to_dict()

{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 222,
 'string_attribute': 'one_detector',
 'vector_attribute': [1, 2, 3, 65],
 'vector_strings': ['one', 'two', 'three']}

In [557]:
Event

string_attribute --> <class 'cppyy.string'>
vector_attribute --> <class 'cppyy.vector<int>'>
vector_strings --> <class 'cppyy.vector<string>'>
int_attribute --> IntCol(5)
float_attribute --> FloatCol(0.0)
boolean_attribute --> BoolCol(False)

In [558]:
print(event) #TODO change maybe

{'boolean_attribute': <collections._Link object at 0x7f98fb7eb828>, 'float_attribute': <collections._Link object at 0x7f98fb7eb7e0>, 'vector_attribute': <collections._Link object at 0x7f98fb7eb120>, 'int_attribute': <collections._Link object at 0x7f98fb7ebd80>, 'vector_strings': <collections._Link object at 0x7f98fb7eb4c8>, 'string_attribute': <collections._Link object at 0x7f98fb7eb240>}


In [559]:
event.some_attribute = 34 # NOPE, cannot do, should not do, there is no "some_attribute"; 
# problem (with the error printing PaxTreeBuffer instance instead of Event one) is, 
#upon instantiation of Event, it returns an instance of TreeBuffer, so the information about Event class is lost

AttributeError: PaxTreeBuffer instance has no attribute `some_attribute`

In [560]:
event.__getattr__('float_attribute') # class_dict

12.34000015258789

In [561]:
event.__getitem__('float_attribute') # the ROOOT branch object

Float(12.34000015258789) at 0x7f98fb779be8

In [562]:
Event.float_attribute  # The attribute type

FloatCol(0.0)

In [563]:
event.__dict__

{'_OrderedDict__hardroot': <collections._Link at 0x7f9913a9dee8>,
 '_OrderedDict__map': {'boolean_attribute': <collections._Link at 0x7f98fb7eb828>,
  'float_attribute': <collections._Link at 0x7f98fb7eb7e0>,
  'int_attribute': <collections._Link at 0x7f98fb7ebd80>,
  'string_attribute': <collections._Link at 0x7f98fb7eb240>,
  'vector_attribute': <collections._Link at 0x7f98fb7eb120>,
  'vector_strings': <collections._Link at 0x7f98fb7eb4c8>},
 '_OrderedDict__root': <collections._Link at 0x7f98fb7b5138>,
 '_branch_cache': {},
 '_branch_cache_event': {},
 '_collections': {},
 '_current_entry': 0,
 '_entry': Int(0) at 0x7f98fb81d5e8,
 '_fixed_names': {},
 '_ignore_unsupported': False,
 '_inited': True,
 '_objects': [],
 '_tree': None}

In [564]:
event.to_json() # Good.

'{"vector_strings": ["one", "two", "three"], "float_attribute": 12.34000015258789, "vector_attribute": [1, 2, 3, 65], "int_attribute": 222, "boolean_attribute": 0, "string_attribute": "one_detector"}'

In [565]:
event.to_dict()

{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 222,
 'string_attribute': 'one_detector',
 'vector_attribute': [1, 2, 3, 65],
 'vector_strings': ['one', 'two', 'three']}

In [566]:
event.get_list_field_info() # this will not be needed, we're using vectors with rootpy

{}

In [567]:
# json back and forth
json_event = event.to_json() 
print(json_event)
event_1 = Event.from_json(json_event)
event_1.to_dict() 

{"vector_strings": ["one", "two", "three"], "float_attribute": 12.34000015258789, "vector_attribute": [1, 2, 3, 65], "int_attribute": 222, "boolean_attribute": 0, "string_attribute": "one_detector"}


{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 222,
 'string_attribute': 'one_detector',
 'vector_attribute': [1, 2, 3, 65],
 'vector_strings': ['one', 'two', 'three']}

In [568]:
# bson back and forth
bson_event = event.to_bson()
print(bson_event)
event_2 = Event.from_bson(bson_event)
event_2.to_dict() 

b'\xd6\x00\x00\x00\x04vector_strings\x00(\x00\x00\x00\x020\x00\x04\x00\x00\x00one\x00\x021\x00\x04\x00\x00\x00two\x00\x022\x00\x06\x00\x00\x00three\x00\x00\x01float_attribute\x00\x00\x00\x00\x80\x14\xae(@\x04vector_attribute\x00!\x00\x00\x00\x100\x00\x01\x00\x00\x00\x101\x00\x02\x00\x00\x00\x102\x00\x03\x00\x00\x00\x103\x00A\x00\x00\x00\x00\x10int_attribute\x00\xde\x00\x00\x00\x10boolean_attribute\x00\x00\x00\x00\x00\x02string_attribute\x00\r\x00\x00\x00one_detector\x00\x00'


{'boolean_attribute': 0,
 'float_attribute': 12.34000015258789,
 'int_attribute': 222,
 'string_attribute': 'one_detector',
 'vector_attribute': [1, 2, 3, 65],
 'vector_strings': ['one', 'two', 'three']}

In [569]:
json_event = event.to_json(fields_to_ignore=['float_attribute','vector_strings']) 
print(json_event)

{"boolean_attribute": 0, "string_attribute": "one_detector", "vector_attribute": [1, 2, 3, 65], "int_attribute": 222}


In [570]:
# usse the model to write a ROOT tree
from rootpy.tree import Tree, Ntuple, TreeModel, TreeChain
from rootpy.tree.treetypes import *
from rootpy.tree.tree import *
from rootpy.tree.model import *
import ROOT
import rootpy.stl as stl

from rootpy.io import root_open, TemporaryFile
f = root_open("test_pax_rootpy_new.root", "recreate")
tree_event = Tree("treeEvent", model=Event)
tree_event.boolean_attribute = True
tree_event.string_attribute = "sdfdsdf"
tree_event.int_attribute = 66
tree_event.vector_strings.push_back("vector1")
tree_event.vector_strings.push_back("vector2")
tree_event.fill()
tree_event.write()
f.write()

for branch in tree_event.GetListOfBranches():
    print(branch)

<ROOT.TBranchElement object ("string_attribute") at 0x9c02190>
<ROOT.TBranchElement object ("vector_attribute") at 0x8c25ac0>
<ROOT.TBranchElement object ("vector_strings") at 0x9954a50>
<ROOT.TBranch object ("int_attribute") at 0x78462f0>
<ROOT.TBranch object ("float_attribute") at 0x9897650>
<ROOT.TBranch object ("boolean_attribute") at 0xb9275a0>


In [571]:
tree_event.Scan('*')

2

In [572]:
for branch in tree_event.GetListOfBranches():
    print(branch)

<ROOT.TBranchElement object ("string_attribute") at 0x9c02190>
<ROOT.TBranchElement object ("vector_attribute") at 0x8c25ac0>
<ROOT.TBranchElement object ("vector_strings") at 0x9954a50>
<ROOT.TBranch object ("int_attribute") at 0x78462f0>
<ROOT.TBranch object ("float_attribute") at 0x9897650>
<ROOT.TBranch object ("boolean_attribute") at 0xb9275a0>


In [573]:
tree_event.Scan("int_attribute:vector_strings","int_attribute>2")

2

In [574]:
ROOT.gROOT.GetVersion()

'6.04/03'