# SDP Data Flow Graph Generation and Analysis

Previously tested with IPython 4.0.0 using Python 2.7 and GraphViz 2.36 - If you have trouble, check version compatibility

Last run with Jupyter Notebook 5.0.0 running Python 3.5.2 and GraphViz 2.38

In [None]:
""" These first few lines below import the IPython definitions and methods that we will use. 
Please Refer to ipython_api.py for the implementation """
import sys

from ipywidgets import interact, interact_manual, fixed, SelectMultiple
from IPython.display import display, FileLink, SVG

sys.path+=['..']
from sdp_par_model.config import PipelineConfig
import sdp_par_model.dataflow.pipeline
from sdp_par_model.dataflow.pipeline import Pipeline, flowsToDot, mergeFlows
from sdp_par_model import evaluate as imp
from sdp_par_model.parameters.definitions import *
%matplotlib inline

# FLOP rates from cost model (approximate)
Rnode = 30 * Constants.tera # single GPU
Rcluster = 14 * Constants.peta # whole cluster

# Pipelines to run for batch export
pipelines_batch = [Pipelines.Ingest, Pipelines.ICAL, Pipelines.RCAL, Pipelines.DPrepA_Image,
                   Pipelines.DPrepB, Pipelines.DPrepC, Pipelines.Fast_Img]

def makePipeline(telescope, band, pipeline,
                 crossings='none', annotate='default', doMerges=False, granularity=800, Tobs=6*3600):
    
    # Create configuration & calculate telescope parameters
    cfg = PipelineConfig(telescope=telescope,band=band,pipeline=pipeline)
    if not cfg.is_valid()[0]:
        print("Configuration invalid:" , cfg.is_valid()[1])
        return
    tp = cfg.calc_tel_params(adjusts={ 'Tobs' : Tobs })
    (tsnap_opt, nfacet_opt) = imp.find_optimal_Tsnap_Nfacet(tp)
    tp = cfg.calc_tel_params(adjusts={ 'Nfacet': nfacet_opt, 'Tsnap': tsnap_opt,
                                            'Nf_min_gran': granularity,
                                            'Tobs' : Tobs })
    pip = Pipeline(tp)

    # Determine run time to expect given our total cluster compute power
    # This can end up larger than Tobs now, as the cluster is sized for
    # the "average" load. This obviously only makes sense for offline
    # processing.
    if pipeline not in Pipelines.realtime:
        Trun = pip.tp.Rflop * pip.tp.Tobs / Rcluster
    else:
        Trun = pip.tp.Tobs
    
    # Determine crossings to check
    cross_regs = {
        'none': None,
        'island': [pip.eachBeam, pip.islandFreqs],
        'island_time': [pip.eachBeam, pip.snapTime],
        'node': [pip.eachBeam, pip.islandFreqs, pip.snapTime, pip.eachFacet, pip.xyPolar],
    }[crossings]
    cross_names = ''
    if not cross_regs is None:
        cross_names = ', checking [' + ', '.join(map(lambda r: r.domain.name, cross_regs)) + \
            '] for crossing edges'
            
    # Create pipeline, merging if requested
    root = pip.create_pipeline(performMerges=doMerges)

    annotateOpts = {
        'none': {'showRegions':False, 'showRates':False, 'showGranularity':False, 'showDegrees':False},
        'default': {},
        'granularity': {'showRegions':False, 'showDegrees':False, 'showRates':False, 'showTaskRates':True }
    }[annotate]
    
    # Generate graph
    dot = flowsToDot(
        root, Trun, Rnode,
        graph_attr={'size':'10,100', 'penwidth':'2',
                    'label':"%s for band %s in %d:%02d%s" % (pipeline, band, Trun / 60, Trun % 60, cross_names)},
        node_attr={'shape':'rounded', 'style':'filled,rounded','color':'lightgrey'},
        edge_attr={'penwidth':'2'},
        cross_regs=cross_regs,
        **annotateOpts
        )
    return dot

## Graphs can be requested interactively

Select the appropriate telescope configuration. Explanation of the other options:

 * granularity - shifts the frequency splits of certain tasks that otherwise tend to have a too coarse granularity (produce overly large results).
 * annotate - how much information to show on nodes and edges
 * crossings - whether to analyse edges for crossing between the given entities
 * merges - group related products to simplify the data flow graph

In [None]:
from ipywidgets import ToggleButtons
def showPipeline(telescope, band, pipeline, *args, **kwargs):
    display(SVG(makePipeline(telescope, band, pipeline, *args,**kwargs)._repr_svg_()))
def toggles(opts, *args): return ToggleButtons(options=opts, *args)
available_pips = Pipelines.pure_pipelines
available_bands = sorted(list(Bands.low_bands) + list(Bands.mid_bands))
available_tels = [Telescopes.SKA1_Low, Telescopes.SKA1_Mid]
available_crossings = ['none', 'island', 'island_time', 'node']
interact_manual(showPipeline,
                telescope=toggles(available_tels), band=toggles(available_bands),
                pipeline=toggles(available_pips), crossings=toggles(available_crossings),
                doMerges=toggles([False, True]),
                annotate=toggles(['default', 'none', 'granularity']),
                granularity=(20,1580),
                Tobs=(3600, 6*3600));

### Reading Guide

| Label | Example Value | Explanation |
| ------------- | ------ | | 
| Grid          |        | | 
| Major Loop:   | 10 x 1 | Runs once per major loop (10x)|
| Facet:        | 16 x 1 | Runs once per facet (16x)|
| Polarisation: | 4 x 1  | Runs once per polarisation (4x)|
| Time: | 444 x 48.5712 s | Runs once per 48.5 s of data (444x) |
| Frequency: | 40681 x 48.3103 ch | Run per 48.3 channels of input data (40681 x) |
| Baseline: | 12 x 11871.1 | Run per each of the 12 baseline bins, containing at maximum 11871.1 baselines |
| Tasks: | 3792528 1/s | Task rate (assuming even task distribution over time) |
| Time: | 9.1e-06 s/task | Run time for individual task (assuming no aggregation and given hardware speed) |
| FLOPs: | 0.61 POP/s | Total floating point operation rate contribution (assuming even task distribution over time) | 
| Output: | 124971.46 TB/s | Total data output (assuming even task distribution over time) |

## Graphs can be (batch)-exported

In [None]:
for telescope, band in [(Telescopes.SKA1_Mid, Bands.Mid1)]:
    for pipeline in pipelines_batch:
        for crossings in ['none', 'island']: # , 'island_time', 'node'
            dot = makePipeline(telescope, band, pipeline, crossings, doMerges=False, annotate='default')
            dot.format='pdf' # png, svg, ...
            display(FileLink(dot.render('out/graphs/%s_%s_%s_dataflow' % (pipeline, band, crossings))))

In [None]:
for telescope, band in [(Telescopes.SKA1_Mid, Bands.Mid1)]:
    for pipeline in pipelines_batch:
        dot = makePipeline(telescope, band, pipeline, annotate='granularity', doMerges=True)
        dot.format='pdf' # png, svg, ...
        display(FileLink(dot.render('out/graphs/%s_%s_granularity' % (pipeline, band))))

## We can also look at data flow properties symbolically

This is especially useful because they should match the product formulas pretty much exactly. When a test case complains about a mismatch, this will be useful to track down differences.

In [None]:
from IPython.display import display, Math, Latex, HTML
from sympy import latex, Lambda, Symbol, simplify

# Get symbolised parameters
cfg = PipelineConfig(telescope=Telescopes.SKA1_Mid,band=Bands.Mid1,pipeline=Pipelines.ICAL)
tp = cfg.calc_tel_params(symbolify='product')
pip = Pipeline(tp)
root = pip.create_pipeline(performMerges=True)

Trun = Symbol("T_run")
Rnode = Symbol("R_node")
for flow in sorted(root.recursiveDeps(), key=lambda f: f.name):
    print(flow.name)
    Ttask = simplify(flow.cost('compute')/flow.count()/Rnode) # Time a single task runs
    Mtask = simplify(flow.cost('transfer')/flow.count()) # Data a single task produces
    Rtask = simplify(Mtask / Ttask) # Virtual output rate of a single task
    display(Math(
        r"\begin{align} n &= %s \\ %s &= %s \\ %s &= %s \\ %s &= %s \\ %s &= %s \\ %s &= %s \end{align}" %
        (latex(flow.count()),
         latex(Symbol('T_{task}')), latex(Ttask),
         latex(Symbol('M_{task}')), latex(Mtask),
         latex(Symbol('R_{task}')), latex(Rtask),
         latex(Symbol('R_{flop}')),
         latex(flow.cost('compute')/Trun),
         latex(Symbol('R_{transfer}')),
         latex(flow.cost('transfer')/Trun))))
