# Turn the Jira cards constituting the Data Roadmap into a Network Graph
Run "Data Roadmap 4 Create Unified List of Jira Cards" first to get the latest view
Then run "Data Roadmap 5 Turn into a Graph" to get the appropriate Graph object(s)

In [1]:
import pandas as pd
import json

pd.options.display.max_rows = 999
import numpy as np
import re
import pickle
from atlassian import Jira

import networkx as nx
import plotly.graph_objects as go
from plotly.validators.scatter.marker import SymbolValidator
from plotly.subplots import make_subplots
from pyvis.network import Network 
import pyvis.options as pyvis_options
from datetime import datetime, date

with open(r'C:\Users\steve.waterman\Python\creds\Jira', 'rb') as handle:
    mycon = pickle.load(handle)
exec(f'jira = Jira({mycon})')
from pprint import pprint

import sys
sys.path.append(r'C:\Users\steve.waterman\Python\read_bitbucket')
import lineage_vis as lv
import Useful_Jira as uj

In [2]:
# Load the cards from step 4
rdmp_tickets,rdmp_tickets_succinct,rdmp_df=pickle.load(open(f"3-Jira_DataRoadmapCards.pickle", "rb" ) )

# And load the graph from step 5
node_df,edge_df,G=pickle.load(open(f"3-Jira_DataRoadmapLatest.pickle", "rb" ) )

## Pre processing

In [3]:
# Create a flag to say whether a task was done already by the time that the Audit Plan came together
rdmp_df['PreDelivered']=rdmp_df['completion_date'].apply(lambda x: False if x==None else True if datetime.strptime(x[:10],"%Y-%m-%d")<datetime(2022,6,24) else False)


In [4]:
# Derive dataset(s)
def extract_dataset(title):

    # End to end
    if re.findall('(.*) End-to-end',title):
        return re.findall('(.*) End-to-end',title)[0]
    
    # S3, Stage, Transform, Logic, Presentation
    elif re.findall('(.*) \(Build .*\)',title):
        return re.findall('(.*) \(Build .*\)',title)[0]
    
    # Reconciliation
    elif re.findall('(.*) \(Final counts reconciliation\)',title):
        return re.findall('(.*) \(Final counts reconciliation\)',title)[0]
    
    
    elif re.findall('Merge (Connect equivalent of .*?) into SnowFlake Data Model',title):
        return re.findall('Merge (Connect equivalent of .*?) into SnowFlake Data Model',title)
    
rdmp_df['Dataset']=rdmp_df['title'].apply(extract_dataset)
rdmp_df['step_list']=rdmp_df['labels'].apply(lambda x:[i for i in x if re.match('\d{1,2}_.*',i)])
rdmp_df['Step']=rdmp_df['step_list'].apply(lambda x:x[0] if len(x)>0 else None)


In [5]:
# Classify work types
def classify_work_type(row):
    if row['title'][:7]=='BLOCKER' and row['epic_key']=='DATA-1766':
        return 'Connect Dev'
    elif row['epic_key']=='CN-2581':
        return 'Recreate legacy tables'
    elif str(row['Step'])!=None and row['Strategy Micro Strand'][-6:]=='Domain' and row['Strategy Macro Strand']=='Data Readiness':
        return 'Data Readiness'
#     elif re.findall('Data Readiness',row['epic_title'],re.IGNORECASE):
#         return 'Data Readiness'
    elif row['epic_key']=='CN-2917': # Tactical reports
        return 'Report Migration'
    elif row['epic_key'] in ['CN-2489'] and 'Report_Migration_Ticket' in row['labels']:
        return 'Report Migration'
    else:
        return 'Other Data Tasks'

rdmp_df.loc[rdmp_df['issuetype_name']!='Epic','Category']=rdmp_df.loc[rdmp_df['issuetype_name']!='Epic'].apply(classify_work_type,axis=1)

In [6]:
# Done flag and counter flag
rdmp_df['Total']=1

rdmp_df['Done']=rdmp_df['status_name'].apply(lambda x: 1 if x in ['Dev - Done (Deployed)','Done'] else 0 if x in ['To Do','To do (imminent)'] else 0.5)


In [7]:
#rdmp_df['status_name'].unique()

# Create a tracker (function) for the Data Readiness Stream

In [8]:
def Data_Readiness_Tracker(input_df,title='Overall',save=True):
    
    # Deep copy the input df
    df=input_df.copy()
    #df.groupby(['Strategy Macro Strand','Strategy Micro Strand']).count()
    
    
    # Pivot to create the tracker df
    data_readiness_tracker=df.loc[(df['Dataset'].notna())\
                              &(df['Step'].notna())\
                              &(df['Strategy Micro Strand'].apply(lambda x: x[-6:]=='Domain'))\
                              &(df['Strategy Macro Strand']=='Data Readiness')\
                              ,['status_name','Dataset','Step','Strategy Micro Strand']]\
.pivot(index=['Strategy Micro Strand','Dataset'],columns='Step',values='status_name').reset_index()
    
    #################################################################################
    # Map display colours to cell values
    #################################################################################
    color_lookup={'To Do':'#d50045'
                   ,'To do (imminent)':'#d50045'
                   ,'Paused / Waiting signoff':'#354670'
                   ,'Paused':'#354670'
                   ,'In Progress':'#354670'
                   ,'On hold':'#354670'
                   ,'Under Review':'#354670'
                   ,'Paused / Waiting signoff':'#354670' 
                   ,'Exploration - Done':'#354670'
                   ,'Exploration - Ready for Dev':'#354670'
                   ,'Exploration - Ready for Refinement':'#354670'
                   ,'Exploration - In Progress':'#354670'
                   ,'Dev - Review':'#354670'
                   ,'Dev - In Progress':'#354670'
                   ,'Done':'#009590'
                   ,'No longer relevant':'#005251'
                   ,'Completed':'#009590'
                  ,'Dev - Done (Deployed)':'#009590'}

    colormap=data_readiness_tracker.applymap(lambda x: color_lookup[x] if x in color_lookup.keys() else "white")
    font_colormap=data_readiness_tracker.applymap(lambda x: "white" if x in color_lookup.keys() else "black")


    #################################################################################
    # Plotly figure across all domains
    #################################################################################
    fig = go.Figure(data=[go.Table(
        header=dict(values=list(data_readiness_tracker.columns),
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[data_readiness_tracker[i] for i in data_readiness_tracker.columns],
                   fill_color=colormap.transpose(),
                    font=dict(color=font_colormap.transpose(), size=11),
                   align='left'))
    ])

    #fig.show()
    fig1=fig # Copy outputs
    if save==True:
        fig1.write_html('{} Overall Data Readiness Progress.html'.format(title))
    
    #################################################################################
# Above for each individual domain only
#################################################################################
    substrands=[]
    for strand in data_readiness_tracker['Strategy Micro Strand'].unique():
        # Subset data
        sub_df=data_readiness_tracker.loc[data_readiness_tracker['Strategy Micro Strand']==strand,].drop(columns=['Strategy Micro Strand'])

        # Color maps
        colormap=sub_df.applymap(lambda x: color_lookup[x] if x in color_lookup.keys() else "white")
        font_colormap=sub_df.applymap(lambda x: "white" if x in color_lookup.keys() else "black")

        #################################################################################
        # Plotly figure
        #################################################################################
        fig = go.Figure(data=[go.Table(
            header=dict(values=list(sub_df.columns),
                        fill_color='#D9D9D9',
                        align='center'),
            cells=dict(values=[sub_df[i] for i in sub_df.columns],
                       fill_color=colormap.transpose(),
                        font=dict(color=font_colormap.transpose(), size=11),
                       align='center'),
        )
        ])

        fig.update_layout({'title':{'text':strand
                                   ,'font_family':"Raleway"
                                   ,'font_color':"black"
                                    ,'x':0.5
                                   },
                              'font_family':"Raleway",
                          })

        #fig.show()
        if save==True:
            fig.write_html('{} Data Readiness Progress- {}.html'.format(title,strand))
        
        substrands.append(fig)
        #fig.to_image('Data Readiness Progress- {}'.format(strand),format='png') 


    data_readiness_tracker=data_readiness_tracker.style.applymap(lambda x: "background-color: {}".format(color_lookup[x]) if x in color_lookup.keys() else "background-color:white")
    return data_readiness_tracker,fig1,substrands
    

# Create a Plotly Dashboard displaying progress

In [9]:
def summarise_task_completion(row):
    if row['Done']==1:
        return 1
    elif row['% complete']:
        return row['% complete']
    else:
        return row['Done']
    
    
def produce_summary_view(input_df,title='Progress Dashboard'
                         ,    top_padding_rows=1 # Specify the number of rows you want to add at the top so that the title is away from the plots 
                        ,footer_padding_rows=1
                        ,spacing_rows=1 # Specify the number of rows to add 
                        ,spacing_row_relative_size=5 # AND/OR specify how big they should be in relative terms
                        ,section_spacing_modifier=10 # Amount to multiple the spacing row by when a whole new section begins
                        ,spacing_col_relative_size=3 # Specify col width relative size
                        ,barchart_row_relative_size=1
                        ,n_cols=2 # !!! Specify number of columns, albeit in practice this is assumed to be 2 elsewhere, so changing this would require some rework!!!!!):
                        ):
    # Copy input
    df=input_df.copy()

    
    # Split subtasks and tasks
    df_subtasks=df.loc[df['issuetype_name'].isin(['Subtask','Sub-task'])]
    df_tasks=df.loc[df['issuetype_name'].isin(['Task','Story'])]
    
    # Group subtasks by parent to get a completion level
    df_subtasks_summary=df_subtasks.groupby(['parent_key','parent_title'])[['Done','Total']].sum().reset_index()
    
    # Calculate a % complete according to subtasks
    df_subtasks_summary['% complete']=df_subtasks_summary['Done']/df_subtasks_summary['Total']

    # rename and filter the columns
    df_subtasks_summary=df_subtasks_summary.loc[:,['parent_key','% complete']].rename(columns={'parent_key':'key'})
    
    
    # join back onto tasks
    df_tasks=df_tasks.merge(df_subtasks_summary,on='key',how='left',suffixes=['',' (subtasks)'])              
    
    # If "done" then 100% else subtask level
    df_tasks['Done']=df_tasks.apply(summarise_task_completion,axis=1)
    
    #################################################
    # Task level summary
    #################################################
        
    # Produce 4 summary views: By Epic Y/N and by whether delivered before 24th June (when Audit planning occurred) Y/N
    vw1=df_tasks.groupby(['Category','epic_title'])[['Done','Total']].sum().reset_index()
    vw1.insert(loc=0,column='Scope',value='Total')
    
    vw2=df_tasks.groupby(['Category'])[['Done','Total']].sum().reset_index()# Summarise overall
    vw2.insert(loc=0,column='Scope',value='Total')
    
    vw3=df_tasks.loc[df_tasks['PreDelivered']==False].groupby(['Category','epic_title'])[['Done','Total']].sum().reset_index()# Summarise by domain (outstanding tasks only)
    vw3.insert(loc=0,column='Scope',value='Outstanding as of 24 June')
    
    vw4=df_tasks.loc[df_tasks['PreDelivered']==False].groupby(['Category'])[['Done','Total']].sum().reset_index()  # Summarise overall (outstanding tasks only)
    vw4.insert(loc=0,column='Scope',value='Outstanding as of 24 June')
    
    task_summary=pd.concat([vw1,vw2,vw3,vw4])
    
    # Calculate overall completion
    task_summary['% Complete']=task_summary['Done']/task_summary['Total']
    
    # Fill in epic title where missing
    task_summary['epic_title'].fillna('Overall',inplace=True)
    
    #display(task_summary)
    
    # Filter to just the interesting delivery tasks-- can expand out the "Other stuff" category if needed
    interesting_stuff=task_summary.loc[(task_summary['Category'].isin(['Data Readiness','Report Migration','Recreate legacy tables']))|\
                                       ((task_summary['Category'].isin(['Other Data Tasks']))&(task_summary['epic_title'].isin(['Overall'])))]
    
    
    #################################################  
    # Create sort keys, then sort by them in order 
    #################################################
    # Create
    cat_sort_order_dict={'Data Readiness':1,'Recreate legacy tables':2,'Report Migration':3,'Other Data Tasks':4}
    
    interesting_stuff['sort_key1']=interesting_stuff['Category'].apply(lambda x: cat_sort_order_dict[x])
    interesting_stuff['sort_key2']=interesting_stuff['epic_title'].apply(lambda x: 0 if x=='Overall' else 1)
    interesting_stuff=interesting_stuff.sort_values(by=['sort_key1','sort_key2','Total'],ascending=[True,True,False])
    interesting_stuff['title_text']=interesting_stuff.apply(lambda x: '{} progress<br>(out of {} tasks)'.format(x['Category'],x['Total']),axis=1)
    
    
    #################################################  
    # Plotly Dashboard relies on "subplots", but you need to tell is what those subplots will be and how many rows they'll take up, so below code tries to figure that out
    #################################################
    
    # Create a "layout" object. This holds a row for each of the graphs
    # Sort key 1 = Category of work, Sort Key 2 is in effect "Overall summary vs EPIC breakdown"
    layout=interesting_stuff.groupby(['sort_key1','sort_key2'])['Total'].count().reset_index()
    
    #####################
    # Layout specifications
    # N.B. Plotly has options for padding etc but they seem to struggle to behave, so instead I'm just adding more whitespace rows and columns to acheive the same effect
    # otherwise the plots overlap
    ####################
    real_num_columns=n_cols*2-1 # Add spacing columns
    
   
    layout['rows_per_plot']=layout['Total'].apply(lambda x:int(x/n_cols)) # The "Total" column is a counter, but because we have two metrics (all time and since 24th June) we need to divide by 2 in a row sum
    # In effect,"Total" specifies the number of rows each visualisation would need. For example a bar chart with 5 data points would require 5 rows. An indicator var would need 1
    
     # For each plot, specify a "spec" item. This is needed later when determining what goes where
    layout['specs']=layout.apply(lambda x: {"type": "indicator", "rowspan": int(x['rows_per_plot'])} if x['sort_key2']==0 \
                                 else {"type": "bar", "rowspan": int(x['rows_per_plot'])},axis=1)
    
    # Calculate the first row for each plot
    layout['row_sum']=layout['rows_per_plot'].cumsum()
    
    # V1 even spacing between all plots
    layout['row_n']=layout['row_sum'].shift(1).fillna(0)+1+top_padding_rows+spacing_rows*layout.index # Calculate the start row

    #layout['row_n']=layout['row_sum'].shift(1).fillna(0)+1+top_padding_rows+(spacing_rows*layout['sort_key2'].apply(lambda x: 2 if x==0 else 1))\
    #                                                                        *layout.index # Calculate the start row
    
    layout['start_row']=layout['row_n']
    
    # Calculate the end row for each plot
    layout['last_row']=layout['row_n']+layout['rows_per_plot']-1 #Calculate the end row
    
    # Calculate the range the plot will take up
    layout['row_range']=layout.apply(lambda row: range(int(row['start_row']),int(row['last_row'])+1),axis=1)
    
    # Add an ID for each plot to retain the info needed
    layout['plot_id']=layout.index    
        
    
    
    # The above "layout" object has a row per plot. However we also need- for the purposes of the "make subplots" plotly function, to specify blanks for:
    # 1) The rows that are part of the range that a plot will take up. E.g. a three row plot = [plot, None, None]
    # 2) The rows that we are using as blank space 
    
    layout2=layout.explode('row_range')
    layout2['row_type']=layout2.apply(lambda x: 'plot start' if x['row_range']==x['start_row'] else 'plot fill',axis=1)
    #display(layout2)
    
    # At this point layout 2 has a record for each row in a plot, but not for each spacing row. Sort that out!
    n_rows=int(layout2['row_range'].max()+footer_padding_rows) # Total number of rows in plot is the bottom plot row + any footer
    
    # Create a record therefore for every single row in the plotly fig, whether it has anything on it or not
    layout2=pd.DataFrame(range(1,n_rows+1),columns=['row_range']).merge(layout2,on='row_range',how='left')
    layout2['row_type'].fillna('spacing row',inplace=True)
    
    
    # Specify the row heights relative to each other, including wider gaps between sections
    layout2['row_below']=layout2['row_type'].shift(-1)
    layout2['next_sortkey2']=layout2['sort_key2'].shift(-1)
    
    def calc_spacing(x):
        if x['row_type']=='spacing row':
            if x['row_below']=='plot start' and x['next_sortkey2']==0:#If an indicator below signalling the beginning of a new section
                return spacing_row_relative_size*section_spacing_modifier
            else:
                return spacing_row_relative_size
        elif x['sort_key2']==1:
                return barchart_row_relative_size
        else:
            return 1
        
    layout2['row_heights']=layout2.apply(calc_spacing,axis=1)
    
    
    
    #############
    # Create the speclist from the layout2 item
    #############
    speclist=[]
    # For each row
    for row in layout2.to_dict(orient='records'):
        # Work out what the plots on that row should look like
        if row['row_type']=='plot start':
            row_item_spec=row['specs']
        else:
            row_item_spec=None          
            
        # Repeat for each column, skipping filler columns
        full_row_spec=[row_item_spec if n%2!=0 else None for n in range(1,real_num_columns+1)]
        speclist.append(full_row_spec)
    #pprint(speclist)
    
    ##########################################
    # Initialise the Plotly figure
    ##########################################
    #pprint(speclist)
    fig = make_subplots(rows=n_rows, cols=real_num_columns
                        ,specs=speclist
                       
                       ,row_heights=list(layout2['row_heights'])
                    ,column_widths=[1 if n%2!=0 else spacing_col_relative_size for n in range(1,real_num_columns+1)]
                       ,column_titles=['Overall Progress',None,'Progress (tasks not yet completed before 24th June)']
                       )
    
    ##########################################
    # Add the 4 plot types
    ##########################################
    for plot_row in layout.to_dict(orient='records'):
        
        # Df holding overall progress
        left_df=interesting_stuff.loc[(interesting_stuff['sort_key1']==plot_row['sort_key1'])\
                                      &(interesting_stuff['sort_key2']==plot_row['sort_key2'])\
                                    &(interesting_stuff['Scope']=='Total')].sort_values(by='% Complete')
        
        # Df holding progress since 24th June
        right_df=interesting_stuff.loc[(interesting_stuff['sort_key1']==plot_row['sort_key1'])\
                                      &(interesting_stuff['sort_key2']==plot_row['sort_key2'])\
                                    &(interesting_stuff['Scope']=='Outstanding as of 24 June')].sort_values(by='% Complete')
        
        ####################################
        # Indicator plots for overall progress
        ####################################
        if plot_row['sort_key2']==0:# If an indicator row
            
            # Add left hand plot
            fig.add_trace(go.Indicator(
                    value = left_df.loc[:,'% Complete'].item(),
                    delta = {'reference': 0},
                    mode = "number",
                    gauge = {
                        'axis': {'visible': False}}
                    ,title={'text':left_df['title_text'].item(),'font':{'size':22,'family':'Raleway', 'color':'#a20034'}}
                    ,number={"font":{"size":28, 'family':'Raleway', 'color':'#d50045'},'valueformat':',.0%'}
                    #,domain = {'row': 1, 'column': 1}
                ),row=int(plot_row['start_row']),col=1)
    
            # Add right hand plot
            fig.add_trace(go.Indicator(
                    value = right_df.loc[:,'% Complete'].item(),
                    delta = {'reference': 0},
                    mode = "number",
                    gauge = {
                        'axis': {'visible': True}}
                    ,title={'text':right_df['title_text'].item(),'font':{'size':22,'family':'Raleway', 'color':'#a20034'}}
                    ,number={"font":{"size":28, 'family':'Raleway', 'color':'#d50045'},'valueformat':',.0%'}
                    #,domain = {'row': 1, 'column': 1}
                ),row=int(plot_row['start_row']),col=3)
        
        ####################################
        # Bar charts plots for itemised progress
        ####################################
        elif plot_row['sort_key2']==1:# If an barchart row, by epic
            #display(left_df)
            fig.add_trace(go.Bar(
        x=left_df['% Complete'],
        y=left_df['epic_title'].apply(lambda x: x.split(':')[-1]),
                orientation='h'      
        ),row=int(plot_row['start_row']),col=1)
            
            fig.add_trace(go.Bar(
        x=right_df['% Complete'],
        y=right_df['epic_title'].apply(lambda x: x.split(':')[-1]),
                orientation='h'
                
        ),row=int(plot_row['start_row']),col=3)
            
    for x in [i for i in fig.layout if i[:5]=='xaxis']:
        #print(x)
        fig.update_layout({x:{'tickformat':'0%','range':[0,1]}})
    
    for y in [i for i in fig.layout if i[:5]=='yaxis']:
        #print(x)
        fig.update_layout({y:{'tickfont':dict(size=12, family='Raleway', color='#242e49')
                             ,'dtick':1}})
    
    # Hide legend
    fig.update_layout(showlegend=False)
    
    # Add Title
    fig.update_layout(
        title={
            'text': title,
            'y':1,
            'x':0.5,
            'xanchor': 'center',
            'yanchor': 'top',
            'font':dict(size=40, family='Raleway', color='#813d63')
        })


    fig.write_html(f'{title}.html')
    return fig,interesting_stuff


# Produce summary views for different outputs

In [10]:
# Overall 
dashboard_rdmp=produce_summary_view(rdmp_df,'Overall Data Roadmap Progress',section_spacing_modifier=50,barchart_row_relative_size=30)
data_readiness_tracker,overall_DR_fig,substrands=Data_Readiness_Tracker(rdmp_df,'Overall')


def summarise_progress(target_jira_list,desc):
    crit_path=lv.get_lineage_of_node_list(target_jira_list,G,lineage_type='ancestor')
    crit_path_df=rdmp_df.loc[rdmp_df['key'].isin(crit_path)]
    
    dashboard_rdmp=produce_summary_view(crit_path_df,desc,section_spacing_modifier=50,barchart_row_relative_size=20)
    data_readiness_tracker,overall_DR_fig,substrands=Data_Readiness_Tracker(crit_path_df,desc)
    return data_readiness_tracker,overall_DR_fig,substrands,*dashboard_rdmp

audit_launch=summarise_progress(['DATA-1772'],'Audit Launch')
audit_fast_follow=summarise_progress(['DATA-1773'],'Audit Fast Follow')
mytracker=summarise_progress(['CN-2774'],'My Tracker Report')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interesting_stuff['sort_key1']=interesting_stuff['Category'].apply(lambda x: cat_sort_order_dict[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interesting_stuff['sort_key2']=interesting_stuff['epic_title'].apply(lambda x: 0 if x=='Overall' else 1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [11]:
display(mytracker[0])

Step,Strategy Micro Strand,Dataset,1_S3,2_Stage,3_Transform,4_Logic,5_Presentation,6_Validation,99_Overall
0,audit Domain,audit,Done,Done,Done,Done,Done,Done,On hold
1,audit Domain,audit_comments,Done,Done,Done,Done,Done,Done,On hold
2,audit Domain,audit_finding,Done,Done,Done,Done,Done,In Progress,In Progress
3,audit Domain,audit_report_template,Done,Done,Done,Done,Done,Done,Done
4,audit Domain,carry_forward_and_follow_up_history,Done,Done,Done,Done,Done,Done,Done
5,audit Domain,corrective_action,Done,Done,Done,Done,Done,To Do,On hold
6,ethnet Domain,node_address,Done,Done,Done,Done,Done,Done,Done
7,ethnet Domain,node_auditorganisationext,Done,Done,Done,Done,Done,Done,Done
8,ethnet Domain,node_org,Done,Done,Done,Done,Done,Done,Done
9,ethnet Domain,node_site,Done,Done,Done,Done,Done,Done,In Progress


In [12]:
audit_launch[0]

Step,Strategy Micro Strand,Dataset,1_S3,2_Stage,3_Transform,4_Logic,5_Presentation,6_Validation,99_Overall
0,audit Domain,audit,Done,Done,Done,Done,Done,Done,On hold
1,audit Domain,audit_comments,Done,Done,Done,Done,Done,Done,On hold
2,audit Domain,audit_finding,Done,Done,Done,Done,Done,In Progress,In Progress
3,audit Domain,audit_report_template,Done,Done,Done,Done,Done,Done,Done
4,audit Domain,carry_forward_and_follow_up_history,Done,Done,Done,Done,Done,Done,Done
5,audit Domain,corrective_action,Done,Done,Done,Done,Done,To Do,On hold
6,audit Domain,org_issue_title_criticalities,Done,Done,Done,To Do,To Do,To Do,In Progress
7,ethnet Domain,node_address,Done,Done,Done,Done,Done,Done,Done
8,ethnet Domain,node_auditorganisationext,Done,Done,Done,Done,Done,Done,Done
9,ethnet Domain,node_org,Done,Done,Done,Done,Done,Done,Done


In [13]:
# ## As EXCEL Dfs

# with pd.ExcelWriter('Data Readiness.xlsx') as writer:


#     for strand in data_readiness_tracker['Strategy Micro Strand'].unique():
#         print(strand)
#         display(data_readiness_tracker.loc[data_readiness_tracker['Strategy Micro Strand']==strand,].style.applymap(lambda x: "background-color: #D50045" if x=='To Do' else\
#                                                                                                                     "background-color: green" if x=='Done' else "background-color: #354570" if x=='In Progress' else "background-color: white"))

#         data_readiness_tracker.loc[data_readiness_tracker['Strategy Micro Strand']==strand,].style.applymap(lambda x: "background-color: #D50045" if x=='To Do' else\
#                                                                                                                 "background-color: green" if x=='Done' else "background-color: #354570" if x=='In Progress' else "background-color: white").to_excel(writer,sheet_name=strand.replace(':',''))



# Visualise plan

In [14]:
######################################################################
# Calc xpos
######################################################################
node_df=lv.vis3_calc_xpos(G, node_df,showtiming=True)
######################################################################
# Calc ypos
######################################################################
node_df=lv.vis4_calc_ypos(G, node_df,showtiming=False,print_update_every=500)

######################################################################
# Standardise
######################################################################
node_df=lv.vis5_standardise_pos(node_df)

Shifting of successor xpositions took: 0.0042111873626708984
And generating xpositions for unconnected nodes took: 0.3471050262451172
After setting xpos, node_df is 2027 long
After calculating ypos band positions, nodeDf is 2027 long
After calculating yband heights, nodeDf is 2027 long
After ypos initialisation, nodeDf is 2027 long
nodeDf is 2027 long, and we are searching over 2027
Generating y position for node 0 of 2027
Generating y position for node 500 of 2027
Generating y position for node 1000 of 2027
Generating y position for node 1500 of 2027
Generating y position for node 2000 of 2027


In [15]:
######################################################################
# Plot the visual itself
######################################################################
lv.plot_pyvis(G, node_df, edge_df,graph_title=f'Data Roadmap',canvasSize=(1800,1200),pyvis_size_scale=10
               ,use_images=False
               ,pyvis_x_scale=1200 # How much to stretch it out
               ,pyvis_y_scale=100
               ,show_graph=True
               ,use_physics=False
               ,savePath=r'C:\Users\steve.waterman\OneDrive - Sedex'
              #,buttonlist=['interaction']
              ,new_opts_as_a_string='''{
  "interaction": {
    "navigationButtons": true
  }
}'''
                ,network_kwargs={'heading':'Data Roadmap','font_color':'white'})


Got current options string
Turned it into a dict
Turned new options into a dict too
Updated my options
Turned it back into a JSON string
And updated the visualisation successfully


<class 'pyvis.network.Network'> |N|=2028 |E|=5,779

# Graph for my tracker report

In [16]:
#############################################################################################################
# Subset nodes
#############################################################################################################
# Flag the items to keep
keep_nodes=crit_path=lv.get_lineage_of_node_list(['CN-2774'],G,lineage_type='ancestor')


# Actually subset
sub_G, sub_nodedf, sub_edgedf=lv.subset_graph(G,node_df,edge_df,keep_nodes=keep_nodes)


#############################################################################################################
# Update appropriate fields
#############################################################################################################

# Change shape of interdependencies
# Recalc Y POS
sub_nodedf=lv.vis4_calc_ypos(sub_G, sub_nodedf,showtiming=False,print_update_every=500)
sub_edgedf['label']==None

lv.plot_pyvis(sub_G, sub_nodedf, sub_edgedf,graph_title=f'My Tracker Report Release',canvasSize=(1800,1200),pyvis_size_scale=10
           ,use_images=False
           ,pyvis_x_scale=1200 # How much to stretch it out
           ,pyvis_y_scale=100
           ,show_graph=True
           ,use_physics=False
              ,savePath=r'C:\Users\steve.waterman\OneDrive - Sedex\Data Strategy\Substrand Visualisations'
#                  ,new_opts_as_a_string='''{
# "layout": {
# "hierarchical": {
#   "enabled": true,
#   "levelSeparation": 325,
#   "nodeSpacing": 205,
#   "treeSpacing": 255,
#   "direction": "LR",
#   "sortMethod": "directed"
# }
# },
# "physics": {
# "hierarchicalRepulsion": {
#   "centralGravity": 0
# },
# "minVelocity": 0.75,
# "solver": "hierarchicalRepulsion"
# }
# }
# '''
            ,network_kwargs={'heading':f'My Tracker Report Release','font_color':'white'})


After calculating ypos band positions, nodeDf is 482 long
After calculating yband heights, nodeDf is 482 long
After ypos initialisation, nodeDf is 482 long
nodeDf is 482 long, and we are searching over 482
Generating y position for node 0 of 482


<class 'pyvis.network.Network'> |N|=483 |E|=1,175

# Graph for each substrand only

In [16]:
strategies=node_df['Strategy Micro Strand'].unique()
strategies=[ 'user-management Domain',
       'Permissions', 'Data-Lineage',
       'Discovery & Profiling', 'Data-Alerting-&-Diagnostics',
       'ethnet Domain', 'RefData Domain', 'slcp-assessments Domain',
       'saq Domain', 'audit Domain',
       'payments Domain', 'SalesForce Domain',
        'Data-Unit-Tests & Deployment',
       'GDPR+', 'Self-serve']

In [18]:

strategies=['Self-serve']
for substrand in strategies:

    #############################################################################################################
    # Subset nodes
    #############################################################################################################
    # Flag the items to keep
    keep_nodes=list(node_df.loc[node_df['Strategy Micro Strand']==substrand,'name'])
    
    # Get the tickets that are not part of the substrand, but that the substrand is dependent on, or blocking of
    # interdependencies=list(edge_df.loc[(edge_df['From'].isin(keep_nodes))&~(edge_df['To'].isin(keep_nodes)),'To'])+\
    #      list(edge_df.loc[~(edge_df['From'].isin(keep_nodes))&(edge_df['To'].isin(keep_nodes)),'From'])

    # Get only the items that are not part of the substrand, but that this substrand depends on.
    interdependencies=list(edge_df.loc[~(edge_df['From'].isin(keep_nodes))&(edge_df['To'].isin(keep_nodes)),'From'])
    keep_nodes=list(set(keep_nodes+interdependencies))
    
    # Actually subset
    sub_G, sub_nodedf, sub_edgedf=lv.subset_graph(G,node_df,edge_df,keep_nodes=keep_nodes)

    #############################################################################################################
    # Define macro strategy
    #############################################################################################################
    macro_strat=list(node_df.loc[node_df['Strategy Micro Strand']==substrand,'Strategy Macro Strand'].unique())[0]
    
   #############################################################################################################
    # Update appropriate fields
    #############################################################################################################

    # Change shape of interdependencies
    sub_nodedf.loc[sub_nodedf['name'].isin(interdependencies),'shape']='circle'
    
    display(sub_nodedf)
    # Recalc Y POS
    sub_nodedf=lv.vis4_calc_ypos(sub_G, sub_nodedf,showtiming=False,print_update_every=500)
    sub_edgedf['label']==None

    lv.plot_pyvis(sub_G, sub_nodedf, sub_edgedf,graph_title=f'{macro_strat}- {substrand}',canvasSize=(1800,1200),pyvis_size_scale=10
               ,use_images=False
               ,pyvis_x_scale=1200 # How much to stretch it out
               ,pyvis_y_scale=100
               ,show_graph=True
               ,use_physics=True
                  ,savePath=r'C:\Users\steve.waterman\OneDrive - Sedex\Data Strategy\Substrand Visualisations'
                     ,new_opts_as_a_string='''{
  "layout": {
    "hierarchical": {
      "enabled": true,
      "levelSeparation": 325,
      "nodeSpacing": 205,
      "treeSpacing": 255,
      "direction": "LR",
      "sortMethod": "directed"
    }
  },
  "physics": {
    "hierarchicalRepulsion": {
      "centralGravity": 0
    },
    "minVelocity": 0.75,
    "solver": "hierarchicalRepulsion"
  }
}
'''
                ,network_kwargs={'heading':f'{macro_strat}- {substrand}','font_color':'white'})
    

Unnamed: 0,id,key,title,assignee_name,status_name,status_id,issuetype_name,issuetype_id,project_id,project_key,...,y_band_height_with_spacing,y_band_macro_order,y_band_boundary,y_band_From,y_band_To,y_band_Range,y_band_midpoint,ypos,xpos_scaled,ypos_scaled
0,65720,DATA-447,Set up Provisioning of SnowFlake and Thoughtsp...,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Story,10587,12757,DATA,...,24,21.0,579,555.0,578.0,"[555, 556, 557, 558, 559, 560, 561, 562, 563, ...",567.0,567.0,0.5,0.872308
1,67309,DATA-1077,node_org End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Task,10588,12757,DATA,...,28,11.0,174,146.0,173.0,"[146, 147, 148, 149, 150, 151, 152, 153, 154, ...",160.0,171.0,0.5,0.263077
2,67314,DATA-1082,node_site End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,In Progress,12058,Task,10588,12757,DATA,...,28,11.0,174,146.0,173.0,"[146, 147, 148, 149, 150, 151, 152, 153, 154, ...",160.0,166.0,0.5,0.255385
3,67323,DATA-1091,node_tradingrel End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,On hold,12091,Task,10588,12757,DATA,...,28,11.0,174,146.0,173.0,"[146, 147, 148, 149, 150, 151, 152, 153, 154, ...",160.0,170.0,0.5,0.261538
4,67331,DATA-1099,node_supplier_site End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Task,10588,12757,DATA,...,28,11.0,174,146.0,173.0,"[146, 147, 148, 149, 150, 151, 152, 153, 154, ...",160.0,169.0,0.5,0.26
5,67341,DATA-1109,audit_finding End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,In Progress,12058,Task,10588,12757,DATA,...,19,24.0,613,594.0,612.0,"[594, 595, 596, 597, 598, 599, 600, 601, 602, ...",603.0,598.0,0.5,0.92
6,67343,DATA-1111,audit End-to-end,{'self': 'https://sedexsolutions.atlassian.net...,On hold,12091,Task,10588,12757,DATA,...,19,24.0,613,594.0,612.0,"[594, 595, 596, 597, 598, 599, 600, 601, 602, ...",603.0,594.0,0.5,0.913846
7,65707,IA-27,ThoughtSpot - User groups,,In Progress,12052,Task,10583,12754,IA,...,22,20.0,555,533.0,554.0,"[533, 534, 535, 536, 537, 538, 539, 540, 541, ...",544.0,547.0,0.576923,0.841538
8,65709,IA-29,ThoughtSpot - Git workflow..,,To Do,12051,Task,10583,12754,IA,...,22,20.0,555,533.0,554.0,"[533, 534, 535, 536, 537, 538, 539, 540, 541, ...",544.0,553.0,0.038462,0.850769
9,65820,IA-39,Delete all TS connections not the AD connection,,Completed,12061,Task,10583,12754,IA,...,22,20.0,555,533.0,554.0,"[533, 534, 535, 536, 537, 538, 539, 540, 541, ...",544.0,548.0,0.576923,0.843077


After calculating ypos band positions, nodeDf is 54 long
After calculating yband heights, nodeDf is 54 long
After ypos initialisation, nodeDf is 54 long
nodeDf is 54 long, and we are searching over 54
Generating y position for node 0 of 54
Got current options string
Turned it into a dict
Turned new options into a dict too
Updated my options
Turned it back into a JSON string
And updated the visualisation successfully


In [None]:
df[['key','title','status_name','epic_title','Strategy Macro Strand','Strategy Micro Strand']].to_csv('Roadmap Export to verify.csv')