In [1]:
from tau_profile_parser import TauProfileParser
from tau_profile_callpath_parser import CallPaths
import pandas as pd
import numpy as np
import hatchet as ht
import re

### Reading data with Tau profile parser

In [2]:
lulesh_data = TauProfileParser.parse('lulesh_profile')

### Extracting interval data

In [3]:
data = lulesh_data.interval_data()

### Parsing Call Path data

In [4]:
call_paths = CallPaths.from_tau_interval_profile(data, node=0, context=0, thread=0)

In [5]:
gf = call_paths.to_hatchet()

### Call Graph - Exclusive Time

In [6]:
print(gf.tree(metric_column="time"))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m1396259.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   ├─ [38;5;22m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcEnerg...lulesh.cc} {2075}][0m
   ├─ [38;5;22m1005.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcMonot...lulesh.cc} {1936}][0m
   ├─ [38;5;22m4000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2022}][0m
   ├─ [38;5;22m18000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2029}][0m
   ├─ [38;5;22m851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh.cc} {1995}][0m
   ├─ [38;5;22m22851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh

### Call Graph - Inclusive Time

In [7]:
print(gf.tree(metric_column="time (inc)"))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m2607391.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m1211132.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   ├─ [38;5;22m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcEnerg...lulesh.cc} {2075}][0m
   ├─ [38;5;22m1005.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcMonot...lulesh.cc} {1936}][0m
   ├─ [38;5;22m4000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2022}][0m
   ├─ [38;5;22m18000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2029}][0m
   ├─ [38;5;22m851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh.cc} {1995}][0m
   ├─ [38;5;22m22851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...

### Pruning
we can prune the tree using the filter method of a GraphFrame object.  
for example:

In [8]:
query = [
    {'name': '.*CalcElemVolume.*'}
]
# the filter method expects either a lambda or a list of queries
# above query will match every node that contains 'CalcElemVolume'
tmp = gf.filter(query, squash=True)
print(tmp.tree(metric_column = 'time'))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1291}][0m
[38;5;46m3002.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1295}][0m
[38;5;34m2002.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1297}][0m
[38;5;46m3004.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1298}][0m
[38;5;34m2002.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1301}][0m
[38;5;34m1000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1311}][0m
[38;5;34m1001.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1313}][0m
[38;5;34m998.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1315}][0m
[38;5;34m1001.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1322}

In [9]:
query = [
    '*',
    ('*', {'name': '.*CalcElemVolume.*'})
]
# above query will find every node that has CalcElemVolume in their name and all the nodes above it
tmp = gf.filter(query, squash=True)
print(tmp.tree(metric_column = 'time'))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m1396259.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   └─ [38;5;22m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
[38;5;196m7493651.000[0m [48;5;246m[38;5;232mOpenMP_Implicit_Task[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] OpenMP_Implicit_Task[0m
   ├─ [38;5;22m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1291}][0m
   ├─ [38;5;22m3002.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1295}][0m
   ├─ [38;5;22m2002.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1297}][0m
   ├─ [38;5;22m3004.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1298}][0m
   ├─ [38;5;22m2002.

In [10]:
query = [
    ('*', {'name': '.*OpenMP_Parallel_Region.*'}),
    '*'
]
# above query will find every node that has OpenMP_Parallel_Region in their name and their subtrees
tmp = gf.filter(query, squash=True)
print(tmp.tree(metric_column = 'time'))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;22m2953.000[0m [48;5;246m[38;5;232mOpenMP_Parallel_Re...esh.cc} {1157, 0}][0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] OpenMP_P...esh.cc} {1157, 0}][0m
   ├─ [38;5;22m1001.000[0m [48;5;246m[38;5;232m[SAMPLE] __kmp_for...int, int) [{} {0}][0m
   ├─ [38;5;22m1000.000[0m [48;5;246m[38;5;232m[SAMPLE] __kmp_for.../libiomp5.so} {0}][0m
   └─ [38;5;22m1001.000[0m [48;5;246m[38;5;232m[SAMPLE] __kmp_rel...cket_lock [{} {0}][0m
[38;5;34m22645.000[0m [48;5;246m[38;5;232mOpenMP_Parallel_Re...esh.cc} {2500, 0}][0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] OpenMP_P...esh.cc} {2500, 0}][0m
   ├─ [38;5;22m11000.000[0m [48;5;246m[38;5;232m[SAMPLE] __kmp_for...int, int) [{} {0}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAM

### Extracting top 10 inclusives, and from there extracting the top 10 exclusives:

In [11]:
top_ten_inclusives = gf.dataframe.sort_values(by=['time (inc)'], ascending=False).drop_duplicates()[:10]
least_value_of_top_ten_inclusives = top_ten_inclusives['time (inc)'][9]

In [12]:
query = [
    ('*', {'time (inc)': f'>= {least_value_of_top_ten_inclusives}'}),
    '*' # so that we get every children of these nodes
]

top_ten_inclusives_gf = gf.filter(query, squash=True)
print(top_ten_inclusives_gf.tree(metric_column = 'time (inc)'))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m2607391.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m1211132.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   ├─ [38;5;22m999.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcElemV...lulesh.cc} {1341}][0m
   ├─ [38;5;22m2000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcEnerg...lulesh.cc} {2075}][0m
   ├─ [38;5;22m1005.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcMonot...lulesh.cc} {1936}][0m
   ├─ [38;5;22m4000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2022}][0m
   ├─ [38;5;22m18000.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2029}][0m
   ├─ [38;5;22m851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...lulesh.cc} {1995}][0m
   ├─ [38;5;22m22851.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcQForE...

In [13]:
top_ten_exclusives = top_ten_inclusives_gf.dataframe.sort_values(by=['time'], ascending=False).drop_duplicates()[:10]
least_value_of_top_ten_exclusives = top_ten_exclusives['time'][9]


In [14]:
query = [
    '*', # so that we get every parent of these nodes
    ('*', {'time': f'>= {least_value_of_top_ten_exclusives}'})
]


top_ten_exclusives_gf = gf.filter(query, squash=True)
print(top_ten_exclusives_gf.tree(metric_column = 'time'))

    __          __       __         __ 
   / /_  ____ _/ /______/ /_  ___  / /_
  / __ \/ __ `/ __/ ___/ __ \/ _ \/ __/
 / / / / /_/ / /_/ /__/ / / /  __/ /_  
/_/ /_/\__,_/\__/\___/_/ /_/\___/\__/  v1.2.0

[38;5;34m1396259.000[0m [48;5;246m[38;5;232m.TAU application[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] .TAU application[0m
   └─ [38;5;22m317313.000[0m [48;5;246m[38;5;232m[SAMPLE] __brk [{} {0}][0m
[38;5;196m7493651.000[0m [48;5;246m[38;5;232mOpenMP_Implicit_Task[0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] OpenMP_Implicit_Task[0m
   └─ [38;5;22m351733.000[0m [48;5;246m[38;5;232m[SAMPLE] CalcPress...lulesh.cc} {2025}][0m
[38;5;22m291080.000[0m [48;5;246m[38;5;232mOpenMP_Sync_Region...esh.cc} {2022, 0}][0m
└─ [38;5;22m0.000[0m [48;5;246m[38;5;232m[CONTEXT] OpenMP_S...esh.cc} {2022, 0}][0m
   └─ [38;5;22m325849.000[0m [48;5;246m[38;5;232m[SAMPLE] __kmp_joi...rier(int) [{} {0}][0m
[38;5;22m300433.000[0m [48;5;246m[3