# Turn the Jira cards constituting the Data Roadmap into a Network Graph
Run "Data Roadmap 4 Create Unified List of Jira Cards" first to get the latest view

Should just be able to hit "Run all"

In [14]:
import pandas as pd
import json

pd.options.display.max_rows = 999
import numpy as np
import re
import pickle
from atlassian import Jira

import networkx as nx
import plotly.graph_objects as go
from plotly.validators.scatter.marker import SymbolValidator
from pyvis.network import Network 
import pyvis.options as pyvis_options


with open(r'C:\Users\steve.waterman\Python\creds\Jira', 'rb') as handle:
    mycon = pickle.load(handle)
exec(f'jira = Jira({mycon})')
from pprint import pprint

import sys
sys.path.append(r'C:\Users\steve.waterman\Python\read_bitbucket')
import lineage_vis as lv
import Useful_Jira as uj

In [15]:
rdmp_tickets,rdmp_tickets_succinct,rdmp_df=pickle.load(open(f"3-Jira_DataRoadmapCards.pickle", "rb" ) )

# CREATE EDGES
1) Using relationships
2) For subtasks, linking them to the parent 

In [16]:
all_edges=uj.jira_linked_issues_as_edges(rdmp_tickets,rdmp_tickets_succinct,ticket_keys=list(rdmp_df['key'].unique()))
all_edges[0]

('CN-2606', 'CN-2684')

# Now make into a graph, or graphs for each substream

### Node formatting

In [17]:
# Create node DF
node_df=rdmp_df.copy()

# Name
node_df['name']=node_df['key']

#Colour
#colour_status={'To Do':'Red','Done':'Green','In Progress':'Orange','Paused':'Orange', 'Completed':'Green'}
colour_status={'To Do':'#d50045'
               ,'To do (imminent)':'#d50045'
               ,'Paused / Waiting signoff':'#354670'
               ,'Paused':'#354670'
               ,'In Progress':'#354670'
               ,'On hold':'#354670'
               ,'Under Review':'#354670'
               ,'Paused / Waiting signoff':'#354670' 
               ,'Exploration - Done':'#354670'
               ,'Exploration - Ready for Dev':'#354670'
               ,'Exploration - Ready for Refinement':'#354670'
               ,'Exploration - In Progress':'#354670'
               ,'Dev - Review':'#354670'
               ,'Dev - In Progress':'#354670'
               ,'Dev - Ready for Release':'#354670'
               ,'Done':'#009590'
               ,'No longer relevant':'#005251'
               ,'Completed':'#009590'
              ,'Dev - Done (Deployed)':'#009590'}
node_df['color']=node_df['status_name'].apply(lambda x:colour_status[x])

# Shape
#node_df['shape']=node_df['Assigned Function'].apply(lambda x:ShapeLookup[x])
node_df['labels']=node_df['labels'].apply(lambda x: x if isinstance(x,list) else [])
node_df['shape']=node_df.apply(lambda row: 'ellipse' if 'Milestone' in row['labels'] else 'box',axis=1)

# Size
node_df['size']=3

# y-band
node_df['y_band']=node_df['Strategy Micro Strand']

# node label
node_df['label']=node_df['title']

# hovertext
node_df['text']=node_df.apply(lambda row: row['title']+' ('+row['key']+')<br>'+'Status: '+row['status_name']+'<br>'+str(row['description']),axis=1)
node_df.head()


Unnamed: 0,id,key,title,assignee_name,status_name,status_id,issuetype_name,issuetype_id,project_id,project_key,...,Dataset,step_list,Step,name,color,shape,size,y_band,label,text
0,62222,DATA-2,Sort out dupes in transform.org_trading_rels,{'self': 'https://sedexsolutions.atlassian.net...,To Do,12057,Story,10587,12757,DATA,...,,[],,DATA-2,#d50045,box,3,General,Sort out dupes in transform.org_trading_rels,Sort out dupes in transform.org_trading_rels ...
1,62004,DATA-4,Develop all Postgress Feeds from kafka,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Story,10587,12757,DATA,...,,[],,DATA-4,#009590,box,3,General,Develop all Postgress Feeds from kafka,Develop all Postgress Feeds from kafka (DATA-4...
2,62784,DATA-5,change snowflake zero copy clone naming conven...,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Story,10587,12757,DATA,...,,[],,DATA-5,#009590,box,3,General Tasks,change snowflake zero copy clone naming conven...,change snowflake zero copy clone naming conven...
3,62005,DATA-7,Deploy all feeds into production once feeds live,{'self': 'https://sedexsolutions.atlassian.net...,Done,12059,Story,10587,12757,DATA,...,,[],,DATA-7,#009590,box,3,General,Deploy all feeds into production once feeds live,Deploy all feeds into production once feeds li...
4,62319,DATA-8,Create view to identify test data,,To Do,12057,Story,10587,12757,DATA,...,,[],,DATA-8,#d50045,box,3,user-management Domain,Create view to identify test data,Create view to identify test data (DATA-8)<br>...


## Create edge object

In [18]:
edge_df=pd.DataFrame(all_edges,columns=['From','To'])


## Create graph object

In [19]:
# Create graph
G=nx.DiGraph()

print(len(G.nodes))
G.add_nodes_from(node_df['name'])

print(len(G.nodes))
G.add_edges_from([(i[0],i[1]) for i in all_edges])

print(f'{len(G.nodes)} nodes in graph and {len(node_df)} nodes in node_df')
if len(G.nodes)!=len(node_df):
    print(list(set(list(G.nodes)) - set(list(node_df['name']))))

lv.check_is_DAG(G)

0
2027
2027 nodes in graph and 2027 nodes in node_df
We have a directed acyclic graph - continue


# Check and format for visualisation

In [20]:
for i in G.nodes():
    if i not in list(node_df['name']):
        print(i)

In [21]:
for i in list(node_df['name']):
    if i not in G.nodes():
        print(i)

In [22]:
print(len(node_df))
print(len(node_df['name'].drop_duplicates()))

2027
2027


## Size based on interconnectivity

In [23]:
node_df=lv.vis1_graphDescriptiveStats(node_df,G,showtiming=False)
#node_df['size']=node_df['degree_centrality'].apply(lambda x: np.log(x*1000)**2+1 if x>0 else 1) # Scale size relative to node importance

node_df['all_dependents']=node_df['name'].apply(lambda x: list(nx.descendants(G,x))) # Scale size relative to node importance
node_df['all_ancestors']=node_df['name'].apply(lambda x: list(nx.ancestors(G,x)))
node_df['size']=node_df['all_dependents'].apply(lambda x: np.log(len(x))+3 if len(x)>0 else 3) # Scale size relative to number of successors


######################################################################
# VALIDATE & fill in blanks
######################################################################
node_df,edge_df=lv.vis2_validate_dfs(node_df,G,edge_df,showtiming=False)
print(len(node_df))

After getting some node attributes, node_df is 2027 long
No colour field specified in edge_df. Default value of "grey" will be used.
              Please pass a field called "size" to manage
No size/line weight field specified in edge_df. Default value of "1" will be used.
              Please pass a field called "size" to manage
No text field specified in edge_df to annotate edges. Default value of "" will be used.
              Please pass a field called "size" to manage
No label field passed to edge_df. It will be blank by default
2027


### Flag if anything is blocking the node

In [24]:
### Numerical status representation- DONE flag
node_df['DoneFlag']=node_df['status_name'].apply(lambda x: 1 if x in ['Done','No longer relevant'] else 0)

dependents_status=node_df.loc[:,['name','predecessors']].explode('predecessors').merge(node_df.loc[:,['name','DoneFlag']], how='inner',left_on='predecessors',right_on='name',suffixes=['','_pred'])\
.groupby('name')['DoneFlag'].min().reset_index().rename(columns={'DoneFlag':'All_Dependents_Done'})

print(len(node_df))
try:
    node_df=node_df.drop(columns=['All_Dependents_Done'])
except:
    pass
node_df=node_df.merge(dependents_status, on='name', how='left')
print(len(node_df))

def isblocked(row):
    if len(row['predecessors'])==0:
        return False
    elif row['All_Dependents_Done']==1:
        return False
    else:
        return True
    
node_df['isBlocked']=node_df.apply(isblocked,axis=1)



2027
2027


In [25]:
pickle.dump((node_df,edge_df,G), open(f"3-Jira_DataRoadmapLatest.pickle", "wb" ) )

In [26]:
print('All run!')

All run!
