In [1]:
import os
import sys
sys.path.append(os.path.join('..', '..'))

import datetime
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import Plot, Range1d, MultiLine, Circle, HoverTool, TapTool, BoxSelectTool, ColumnDataSource, LabelSet, PointDrawTool, WheelZoomTool
from bokeh.models.graphs import from_networkx, NodesAndLinkedEdges, EdgesAndLinkedNodes
from bokeh.palettes import Spectral4, Spectral3, Category20
import networkx as nx

from mikesnowflake.analysis.snowFlakeAnalysis import SnowFlakeAnalysis

output_notebook()

In [2]:
# snowflake login
user = '####'
password = '####'

In [3]:
START_DATE = datetime.datetime(2018, 12, 1)
END_DATE = datetime.datetime(2020, 1, 16)
sfa = SnowFlakeAnalysis(START_DATE, END_DATE, user, password)

initializing snowflake analysis
obtained snowflake tables and views
excluding SNOWFLAKE_PROD_ETL user from select statements.
obtained gcs table and view names
obtaining hit breakdown
getting repo status
getting yaml info
getting job dependency directed graph of dependent table names
created view directed graph of dependent table names
created rollup directed graph of dependent table names
calculating tablename dependency degrees
getting cronjob count by table
init complete


In [4]:
# combines job dependencies, view dependencies and rollup dependencies
JG = sfa.jobGraph
VG = sfa.viewGraph
RG = sfa.rollupGraph
A = nx.compose(VG, JG)  
G = nx.compose(A, RG)

In [5]:
def getPlot(H, searchStr=None):
    """
    """
    defaultColor = Category20[3][1]
    defaultSize = '12px'
    colorMap = {n: {'color': defaultColor} for n in H.nodes()}
    title = ''
    if searchStr:
        tableNames = [n for n in H.nodes() if searchStr in n or searchStr in n.lower()]
        if len(tableNames) > 0:
            colorMap = {n: {'color': 'red'} if n in tableNames else {'color': defaultColor} for n in H.nodes()}  # highlight the node as red
        title = "search='%s' returned %s results (highlighted in red)" % (searchStr, len(tableNames))
    nx.set_node_attributes(H, colorMap)
    
        
    plot = figure(title=title, x_range=(-1.1,1.1), y_range=(-1.1,1.1), height=1500, width=2000, tools="pan,box_zoom,reset")
    plot.title.align = 'center'

    graph_renderer = from_networkx(H, nx.spring_layout, scale=1, center=(0,0))
    graph_renderer.node_renderer.glyph = Circle(size=15, fill_color='color')
    graph_renderer.node_renderer.selection_glyph = Circle(size=15, fill_color=Spectral4[2])
    graph_renderer.node_renderer.hover_glyph = Circle(size=15, fill_color=Spectral4[1])

    graph_renderer.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_alpha=1, line_width=1)
    graph_renderer.edge_renderer.selection_glyph = MultiLine(line_color=Spectral4[2], line_width=3)
    graph_renderer.edge_renderer.hover_glyph = MultiLine(line_color=Spectral4[1], line_width=3)

    graph_renderer.selection_policy = NodesAndLinkedEdges()
    graph_renderer.inspection_policy = EdgesAndLinkedNodes()

    wheel_zoom = WheelZoomTool()
    plot.add_tools(HoverTool(tooltips=None), TapTool(), wheel_zoom)
    plot.toolbar.active_scroll=wheel_zoom
    plot.renderers.append(graph_renderer)
    pos = graph_renderer.layout_provider.graph_layout
    x,y=zip(*pos.values())
    source = ColumnDataSource({'x':x,'y':y, 'label': list(pos.keys())})
    labels = LabelSet(x='x', y='y', text='label', source=source, text_font_size='11px')
    plot.renderers.append(labels)
    return plot

In [6]:
p = getPlot(G, searchStr='publisher')
show(p)