In [None]:
from IPython.display import HTML

import pandas as pd
import xarray as xr

import matplotlib.pyplot as plt
%matplotlib inline

### This package gathers profiling information from pyinstrument, line_profiler, and memory_profiler and returns a pandas dataframe or prints the information directly.

In [None]:
import profiletk

In [None]:
ptk = profiletk.ProfileTK()

### Create an array of files that we're interested in looking at within this example.

In [None]:
fns = [
    'wrfout_d01_1979-01-01-01_00:00:00_2D.nc',
    'wrfout_d01_1979-01-01-01_00:00:00_3D.nc',
    'mpas_output.nc',
    'b40.20th.track1.1deg.006.cam2.h0.TS.185001-185012.nc',
    'b40.20th.track1.1deg.006.cam2.h0.T.185001-185012.nc',
    'b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.cice.h.0850-01.nc',
    'b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.clm2.h0.0850-01.nc',
    'b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.pop.h.0850-01.nc',
    'b.e21.BWmaHIST.f19_g17.PMIP4-past1000.001.cam.h0.0850-01.nc'
]

### Fill the pandas dataframe with timing information from calling xr.open_dataset for different files and different arguments.

In [None]:
for fn in fns:
    key = fn+'.defaults'
    ptk.collect_functional_times(key=key, func=xr.open_dataset, filename_or_obj='data/'+fn)
for fn in fns:
    key = fn+'.preferred'
    ptk.collect_functional_times(key=key, func=xr.open_dataset, filename_or_obj='data/'+fn, decode_times=False, mask_and_scale=False, decode_cf=False)
    

### Print the dataframe that was just created

In [None]:
ptk.timingsDF

### Get a list of keys within the dataframe

In [None]:
pd.options.display.max_colwidth = 10000
pd.set_option('display.width', 1000)
ptk.timingsDF['key']

### Create a line plot that looks at the performance of different functions across the different keys

In [None]:
ptk.timingsDF.sort_values('open_dataset').plot(x="key", y=['open_dataset', 'decode_cf_variable', 'as_variable'], alpha=0.50, rot=90)

### Create a horizontal bar plot that plots the time to run a function across all keys

In [None]:
import seaborn as sns

sns.factorplot(y='key',x='as_variable',kind='bar',data=ptk.timingsDF.sort_values('as_variable'))

### Print the full timeline profile generated from pyinstument

In [None]:
print(ptk.profiles['mpas_output.nc.preferred'])

### Populate the pandas dataframe from a function that the user creates

In [None]:
def open_read(fn):
    ds = xr.open_dataset(fn)
    for v in ds.variables:
        t = v
for fn in fns:
    key = fn+'.defaults'
    ptk.collect_functional_times(key=key, func=open_read, fn='data/'+fn)

### Print the functions in order desending from the most expensive.  Also prints the depth this function is found within the call tree.

In [None]:
ptk.print_timer_hotspots('mpas_output.nc.defaults', l=25)

### Provides a print out of the line by line timings for the selected functions

In [None]:
ptk.collect_linebyline_times(func=xr.open_dataset, o_funcs=[xr.backends.api._protect_dataset_variables_inplace],filename_or_obj='data/wrfout_d01_1979-01-01-01_00:00:00_2D.nc', decode_times=False, mask_and_scale=False, decode_cf=False)

### Provides a print out of the line by line memory usage for the selected functions

In [None]:
ptk.collect_linebyline_memory_usage(func=xr.open_dataset, o_funcs=[xr.conventions.decode_cf_variable, xr.core.variable.as_variable],filename_or_obj='data/wrfout_d01_1979-01-01-01_00:00:00_2D.nc', decode_times=False, mask_and_scale=False, decode_cf=False)

### Prints out the highwater mark for memory usage for the selected function

In [None]:
ptk.collect_memory_usage(func=xr.open_dataset,filename_or_obj='data/wrfout_d01_1979-01-01-01_00:00:00_2D.nc', decode_times=False, mask_and_scale=False, decode_cf=False)

### Print the memory hotspots

In [None]:
ptk.print_memory_hotspots(func=xr.open_dataset,l=5,filename_or_obj='data/wrfout_d01_1979-01-01-01_00:00:00_2D.nc', decode_times=False, mask_and_scale=False, decode_cf=False)

### Create a call graph

In [None]:
from IPython.display import Image

img_fn = 'call_graph.png'
ptk.show_call_graph(func=xr.open_dataset,fn=img_fn,filename_or_obj='data/wrfout_d01_1979-01-01-01_00:00:00_2D.nc', decode_times=False, mask_and_scale=False, decode_cf=False)

Image(filename = img_fn)