In [264]:
from tau_profile_parser import TauProfileParser
import hatchet as ht

### Reading data with the profile parser

In [265]:
lulesh_data = TauProfileParser.parse('lulesh_profile')

### Extracting interval data

In [282]:
data = lulesh_data.interval_data()

### Parsing Call Path data

The callpath data is in the `TAU_CALLPATH` group, so we filter our dataframe to give us only the callpaths.
Further, the calls in the callpath are joined by ` => `, so we split them into a hierarchical index, and construct a tree from the generated multiindexed dataframe.

In [283]:
class Node():
    """
    Abstract node of a tree that will be passed to hatchet API
    """
    def __init__(self, name, kwargs):
        raise NotImplemented("Node is abstract")
        
    def to_dict(self):
        """
        Hatchet expects a dictionary representation of each node with at least
        the keys `name`(string) and `metrics`(dict).
        an examplie of metrics would be : {"inclusive_time": 10.0, "exclusive_time": 9.0}
        """
        return {"name": self._name, "metrics": self._metrics}
    
    def _initialize(self, name, kwargs):
        """
        constructor, called by subclasses
        """
        self._name = name
        self._metrics = kwargs
            
class LeafNode(Node):
    """
    Just a node
    """
    def __init__(self, name, **kwargs):
        """
        name: str; name of the timer
        kwargs; metrics of the node
        """
        super(LeafNode, self)._initialize(name, kwargs)
            
class InnerNode(Node):
    """
    A node with children
    """
    def __init__(self, name, **kwargs):
        """
        name: str; name of the timer
        kwargs; metrics of the node
        """
        super(InnerNode, self)._initialize(name, kwargs)
        self._children = set()
        
    def to_dict(self):
        """
        Hatchet expects a inner nodes of the tree to contain, on top of what a regular node contains,
        one extra field called `children`.
        children are also nodes.
        """
        children = []
        for child in self._children:
            children.append(child.to_dict())
        return {"name": self._name, "metrics": self._metrics, "children": children}
    
    def add_children(self, node):
        self._children.add(node)
        

class CallPaths():
    """
    Generates call paths that are understood by hatchet
    """
    def __init__(self, non_call_path_data, call_path_data):
        """ initializer should not be directly called instead use factory method """
        self._roots = []
        self._non_call_path = non_call_path_data
        self._call_path_data = call_path_data
        self._depth = len(call_path.index.levshape)
        self._recursive_constructor(self._call_path_data, None, 0)
        
    def get_roots(self):
        """
        creates a json-like (list of dictionaries) that is understood by hatchet.GraphFrame.from_literal() method.
        """
        return [root.to_dict() for root in self._roots]

    def _recursive_constructor(self, call_path_data, parent_node, level):
        """recursively builds the tree"""
        functions_on_this_level = list(call_path_data.groupby(level=0).groups.keys())
        functions_on_this_level = [i.strip() for i in functions_on_this_level]
        for func in functions_on_this_level:
            if level == self._depth - 1 or func == 'NaN':
                node = LeafNode(func, exclusive_time=self._non_call_path.loc[func]['Exclusive'],
                                      inclusive_time=self._non_call_path.loc[func]['Inclusive'])
            else:
                node = InnerNode(func, exclusive_time=self._non_call_path.loc[func]['Exclusive'],
                                      inclusive_time=self._non_call_path.loc[func]['Inclusive'])
                self._recursive_constructor(call_path_data.loc[func], node, level + 1)

            if parent_node is not None:
                parent_node.add_children(node)
            else:
                self._roots.append(node)
                
    @staticmethod
    def _get_call_paths(data, node, context, thread):
        data = data[data['Group'].str.contains('TAU_CALLPATH', regex=False)].loc[node, context, thread]
        data = data.set_index(data.index.str.split("\s*=>\s*", expand=True))
        return data
        
    @staticmethod
    def _get_non_call_paths(data, node, context, thread):
        data = data[~data['Group'].str.contains('TAU_CALLPATH', regex=False)].loc[node, context, thread]
        data = data.set_index(data.index.str.strip())
        return data

    @staticmethod
    def from_tau_interval_profile(tau_interval, node, context, thread):
        """
        Creates and returns a CallPath object
        
        tau_interval: pandas.DataFrame; the interval data from TauProfileParser
        node: int
        contex: int
        thread: int
        """
        non_call_path_data = CallPaths._get_non_call_paths(tau_interval, node, context, thread)
        call_path_data = CallPaths._get_call_paths(tau_interval, node, context, thread)
        return CallPaths(non_call_path_data, call_path_data)
        

In [274]:
call_paths = CallPaths.from_tau_interval_profile(data, 0, 0, 0)

In [275]:
graph_dict = call_paths.get_roots()

In [276]:
gf = ht.GraphFrame.from_literal(graph_dict)

In [277]:
print(gf.tree(color=True, metric_column="exclusive_time"))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m1396259.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   ├─ [38;5;22m8003.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcEnerg...lulesh.cc} {2075}][0m
   ├─ [38;5;22m1005.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcMonot...lulesh.cc} {1936}][0m
   ├─ [38;5;22m4000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2022}][0m
   ├─ [38;5;22m18000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2029}][0m
   ├─ [38;5;22m851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh.cc} {1995}][0m
   ├─ [38;5;22m22851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lules

In [278]:
print(gf.tree(color=True, metric_column="inclusive_time"))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;196m12513680.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m1211132.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   ├─ [38;5;22m8003.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcEnerg...lulesh.cc} {2075}][0m
   ├─ [38;5;22m1005.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcMonot...lulesh.cc} {1936}][0m
   ├─ [38;5;22m4000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2022}][0m
   ├─ [38;5;22m18000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2029}][0m
   ├─ [38;5;22m851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh.cc} {1995}][0m
   ├─ [38;5;22m22851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE

In [279]:
gf.dataframe

Unnamed: 0_level_0,name,exclusive_time,inclusive_time
node,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
{'name': '.TAU application'},.TAU application,1396259,12513680
{'name': '[CONTEXT] .TAU application'},[CONTEXT] .TAU application,0,1211132
{'name': '[SAMPLE] CalcElemVolume [{/home/nchaimov/LULESH/lulesh.cc} {1341}]'},[SAMPLE] CalcElemVolume [{/home/nchaimov/LULES...,8003,8003
{'name': '[SAMPLE] CalcEnergyForElems [{/home/nchaimov/LULESH/lulesh.cc} {2075}]'},[SAMPLE] CalcEnergyForElems [{/home/nchaimov/L...,2000,2000
{'name': '[SAMPLE] CalcMonotonicQForElems [{/home/nchaimov/LULESH/lulesh.cc} {1936}]'},[SAMPLE] CalcMonotonicQForElems [{/home/nchaim...,1005,1005
...,...,...,...
"{'name': '[CONTEXT] OpenMP_Sync_Region_Barrier void Release<double>(double**) [{/home/nchaimov/LULESH/lulesh.h} {118, 0}]'}",[CONTEXT] OpenMP_Sync_Region_Barrier void Rele...,0,266608
{'name': '[SAMPLE] Tau_global_incr_insideTAU [{/home/nchaimov/tau2/src/Profile/TauCAPI.cpp} {309}]'},[SAMPLE] Tau_global_incr_insideTAU [{/home/nch...,9000,9000
{'name': '[SAMPLE] __GI___sched_yield [{} {0}]'},[SAMPLE] __GI___sched_yield [{} {0}],91250,91250
{'name': '[SAMPLE] __kmp_hardware_timestamp [{} {0}]'},[SAMPLE] __kmp_hardware_timestamp [{} {0}],1227608,1227608
