# Set up:
1) Modules
2) Credentials & keys for Trello and Google Sheets
3) Load Trello board
4) Configure pandas output

In [54]:
# Set up modules
import pandas as pd
import sys
import requests
from xlsxwriter.utility import xl_rowcol_to_cell
sys.path.append(r'/home/jupyter/reusable_code')
import google_api_functions as gaf
import trello_generic as tg
import sqlite3
from google.cloud import bigquery # To run BQ statements
import re
from datetime import datetime
import numpy as np

import plotly.graph_objects as go
import networkx as nx
from plotly.validators.scatter.marker import SymbolValidator
import random

import requests
import json
# Set up SQL DB
conn = sqlite3.connect('SQL_connection1.db') #Create a connection object

# Set up credentials for Trello and Google 

# Google Sheets Credentials
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/') # Return logged-in credentials

# General setup and credentials: Trello
from trello import TrelloClient
trelloUserCreds=tg.readTrelloCredsFromFile(r'/home/jupyter/reusable_code/trellocreds.pickle')
mykey,mysecret,mytoken=trelloUserCreds

client = TrelloClient(api_key=mykey,api_secret=mysecret,token=mytoken)

# Return Trello board, client and other credentials objects. "myboard_creds" is a tuple of items which can be unpacked
# to cover off all of the various levels you might need access at
dataBoard,dataBoard_id,dataBoard_creds=tg.Return_board_by_name(mykey,mysecret,mytoken,"Data 2021")
researchBoard,researchBoard_id,researchBoard_creds=tg.Return_board_by_name(mykey,mysecret,mytoken,"Research 2021")
#oldBoard,oldBoard_id,oldBoard_creds=tg.Return_board_by_name(mykey,mysecret,mytoken,"Insights & Data")
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 500)

# Read Trello board into a DataFrame

In [55]:
dataCard_df,dataCard_list=tg.cards_to_dataframe(dataBoard_creds,checklist_options=None\
                           ,labels_as_binary_flags=True, label_colours=True,comment_names=False,get_attachments=True #)
                                               ,card_number_cutoff=10000, \
                                                lists_to_exclude=['Template(s)','Ideation','No longer required','Completed'])

In [56]:
dataCard_list[:5]

# Turn "New Insight Brief(s)" into proper cards

In [57]:
def monthToNum(shortMonth):
    return {
            'Jan' : '01',
            'Feb' : '02',
            'Mar' : '03',
            'Apr' : '04',
            'May' : '05',
            'Jun' : '06',
            'Jul' : '07',
            'Aug' : '08',
            'Sep' : '09', 
            'Oct' : '10',
            'Nov' : '11',
            'Dec' : '12'
    }[shortMonth]


def Process_New_Insight_Briefs():
    MonthLookup={}
    
    New_briefs=[i for i in dataCard_list if i['Name']=='New Insight Brief' and i['List'] in ['New Projects (need prioritisation)','Backlog', 'Prioritised','In Progress']]
    labellist=dataBoard.get_labels()
    for i in New_briefs:
            CardInfo=re.split('Sent via Google Form Notifications',i['Description'])[0] # Take the bit before the generic email signature
            print(CardInfo)
            
            #############################
            # Reset card name
            #############################
            i['Card Object'].set_name(re.search('\*Project Name\*: \*(.*)\*',CardInfo)[1])

            #############################
            # Set Due Date            
            #############################
            DeadlineInfo=re.search('\*Needed by: \*(.*)',CardInfo)[1]
            #print(DeadlineInfo)
            try:
                DueDate=datetime.strptime(DeadlineInfo[:12], '%b %d, %Y')
                #print(DueDate)
                i['Card Object'].set_due(DueDate)
            except:
                pass
            
            try: 
                DueText=str(DueDate)[:10]
            #print(DueText)
                tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Hard Deadline',DueText)
            except:
                pass
            
            #############################
            # Set labels for team        
            #############################
            try:
                Team=re.search('\*Submitted by\*: .* in the (.+) team',CardInfo)[1]
                try:
                    LabelObject=[i for i in labellist if i.name==Team][0]
                    i['Card Object'].add_label(LabelObject)
                except:
                    print('Could not add label for team: "{}"'.format(Team))
            except:
                pass
          
                
            #############################
            # Set labels for priorities/ projects            
            #############################
            projects=re.findall('\*Supports projects\*: (.*)',CardInfo)
            if len(projects)>0:
                projectList=re.split('\n',projects[0])
                for project in projectList:
                    try:
                        LabelObject=[i for i in labellist if i.name==project][0]
                        i['Card Object'].add_label(LabelObject)
                    except:
                        print('Could not add label for project: "{}"'.format(project))
    
            #############################
            # Set custom field (type)
            #############################
            WorkType=re.search('\*(.*) brief\*',CardInfo)[1]
            if WorkType=='No idea- you tell me':            # If unknown leave blank
                pass
            else:
                try:
                    tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Type',WorkType)
                except:
                    print('Could not update Type field with value: {}'.format(WorkType))
            
            #############################
            # Set custom field (Blocker) 
            try:
                Blockerinfo= re.search('\*Blocker: \*(.*)? which should be resolved by',CardInfo)[1]
                tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Blockers/ Dependencies 1',Blockerinfo)
            except:
                pass
            
            # Set custom field (Blocker date) 
            try:
                Blockerdatetext= re.search('\*Blocker: \*.* which should be resolved by (.+)',CardInfo)[1]
                BlockerDate= '-'.join([Blockerdatetext[8:12],monthToNum(Blockerdatetext[0:3]),Blockerdatetext[4:6]])   
                tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Blocker 1 Due Date',BlockerDate)
            except:
                pass
            
            # Redo description            
            NewDesc=re.split('Project Detail',CardInfo)[1]+'\n Submitted By: '\
            +re.search('\*Submitted by\*: (.*)@...',CardInfo)[1].replace('.',' ')\
            +'\nRequired by: '+DeadlineInfo
            i['Card Object'].set_description(NewDesc)
            
            # If research move to research board
            if WorkType=='Research':
                tg.MoveCard(i['Card Object'],'5fe35ef42dd5616a3e37bc12',mykey,mytoken,boardid=researchBoard_id)
    
    return New_briefs
   

In [58]:
briefs=Process_New_Insight_Briefs()

# Assign card numbers

In [59]:
cardlist=dataBoard.get_cards() # Return all cards on board
cardnums=[int(re.search('\#(\d{1,4})',i.name)[1]) for i in cardlist if re.search('\#\d{1,4}',i.name)!=None] # Identify where there are already card numbers in the format #number
max_cardnum=np.max(cardnums) # Get the maximum card number on the board
cards_without_num=[i for i in cardlist if re.search('\#\d{1,4}',i.name)==None] # List all cards without a number 

# Loop cards without a number and add one to their name
for n,i in enumerate(cards_without_num):
    i.set_name('#'+str(n+1+max_cardnum)+' '+i.name)
    print('#'+str(n+1+max_cardnum)+' '+i.name)

In [60]:
# Reimport
dataCard_df,dataCard_list=tg.cards_to_dataframe(dataBoard_creds,checklist_options=None\
                           ,labels_as_binary_flags=True, label_colours=False,comment_names=False,get_attachments=True #)
                                               ,card_number_cutoff=10000,lists_to_exclude=['Template(s)','Ideation','No longer required','Completed'])

## Copy to BigQuery

In [61]:
creds=gaf.Authenticate_Google(r'/home/jupyter/reusable_code/')
bq = bigquery.Client(project='itv-bde-analytics-dev',credentials=creds)
dataset=bq.dataset('britbox_sandbox')
table_ref = dataset.table("SW_Data_Workstack")

In [62]:
# Create a copy for amending then loading into BQ
df_for_bq=dataCard_df.copy()

# Most of the columns are "object" type, which holds mixed types. Explicitly make dates as such else it'll break load as it expects a "bytes" type then finds a datetime
df_for_bq['Due Date'] = pd.to_datetime(df_for_bq['Due Date'].astype(str))
df_for_bq['Card Created Date'] = pd.to_datetime(df_for_bq['Card Created Date'].astype(str))
df_for_bq['Hard Deadline'] = pd.to_datetime(df_for_bq['Hard Deadline'].astype(str))
df_for_bq['Blocker 1 Due Date'] = pd.to_datetime(df_for_bq['Blocker 1 Due Date'].astype(str))

# Struggles to identify and load array of STRUCTs with different datatypes, so just don't for now

df_for_bq=df_for_bq.drop(columns=['listMovementHistory', 'listMovementSummary','coordinates'])


# Remove characters that you can't have in a BQ variable name
newcol_names={x:x.replace(" ", "_").replace("/","").replace("?","").replace("-","").replace("&","").replace(":","").replace("(","").replace(")","") for x in df_for_bq.columns}
df_for_bq=df_for_bq.rename(columns=newcol_names)

#Remove blank column names which might arise from blank labels on the board
df_col=[i for i in df_for_bq.columns if len(i)>0] 
df_for_bq=df_for_bq[df_col]

In [63]:
df_for_bq.head()

In [64]:
try:
    bq.delete_table(table_ref)
except:
    pass
job = bq.load_table_from_dataframe(df_for_bq, table_ref)

job.result()  # Waits for table load to complete.
print("Loaded dataframe to {}".format(table_ref.path))

In [593]:
df_for_bq.columns

# Tidy up DataFrame to include only the records and columns of interest

In [65]:
query="""
create or replace table `itv-bde-analytics-dev.britbox_sandbox.SW_Data_Workstack1` as
with x  as (select 
Name
,Description
,List
,Card_Number
,Due_Date
,Trello_ID
,Trello_URL
,Card_Created_Date
,Comments
,Trello_attachments
,Other_attachments
,Blocker_1_Due_Date
,Supported_by
,Ad_hoc
,Assigned_to
,Blockers_Dependencies_1
,Type
,Subtype
,Blockers_Dependencies_2
--,Project_Brief_Location
,Paused_or_Blocked
,Mark_for_Deletion
,Hard_Deadline
,EPIC
--,isEPIC

,currentListTimeSpent
,currentListTimesEntered
,currentListFirstEntered
,currentListLastEntered
,listpos
,cardpos
,boardpos

,split(Labels,'|') as labels1
from `itv-bde-analytics-dev.britbox_sandbox.SW_Data_Workstack`
where list not in ('Template(s)','No longer required')
)

select x.* except (labels1)
, array_agg(case when trim(split(labels2,':')[safe_offset(1)])='green' then 
trim(split(labels2,':')[safe_offset(0)]) end ignore nulls) as Teams
, array_agg(case when trim(split(labels2,':')[safe_offset(1)])='yellow' then 
trim(split(labels2,':')[safe_offset(0)]) end ignore nulls) as Priorities
, array_agg(case when trim(split(labels2,':')[safe_offset(1)])='blue' then 
trim(split(labels2,':')[safe_offset(0)]) end ignore nulls) as TeamObjectives
from x
cross join unnest (labels1) as labels2
group by 1,2,3,4,5,6,7,8,9,10
,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
;"""
df = bq.query(query).to_dataframe()

In [66]:
df

### Post-prioritisation session admin

##### Move all cards from New to Backog

In [None]:

from_list=[i for i in dataBoard.list_lists() if i.name=='New Projects (need prioritisation)'][0]
to_list=[i for i in dataBoard.list_lists() if i.name=='Backlog'][0]
from_list.move_all_cards(to_list)   

##### Move all cards from Recently Completed to Completed

In [595]:

from_list=[i for i in dataBoard.list_lists() if i.name=='Recently Completed'][0]
to_list=[i for i in dataBoard.list_lists() if i.name=='Completed'][0]
from_list.move_all_cards(to_list)   

## One off Updates

### Set Hard Deadlines for historically processed cards

In [97]:
for i in dataCard_list:
    DeadlineInfo=None
    DeadlineInfo=re.search('Required by: (.*)',i['Description'])

    if DeadlineInfo!=None:
        print(DeadlineInfo[1])
        try:
            DueDate=datetime.strptime(DeadlineInfo[1][:12], '%b %d, %Y')
            print(DueDate)
            DueText=str(DueDate)[:10]
            print(DueText)
            tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Hard Deadline',DueText)
            print('Updated to {}'.format(DueText))
        except:
            pass
    

In [99]:
for i in dataCard_list[:23]:
    tg.Update_custom_field(dataBoard_creds,i['Trello ID'],'Hard Deadline','')

### One  off- assign members using assigned to field, so timelines can be grouped by member

In [223]:
Members=dataBoard.get_members()
MemberDict={i.full_name:i for i in Members}
MemberDict

In [218]:
assignees=list(dataCard_df['Assigned to'][dataCard_df['Assigned to'].notna()].unique())
assignee_dict={i:None for i in assignees}

for i in assignee_dict:
    for j in MemberDict:
        if i[:3].lower() ==j[:3].lower():
            print(i,j)
            assignee_dict[i]=j
assignee_dict


In [226]:

query = {'key': mykey,'token': mytoken}

for i in dataCard_list:

    if 'Assigned to' in i.keys():
        print(i['Name'])
        print(i['Assigned to'])

        Member_needed=assignee_dict[i['Assigned to']]
        print(Member_needed)
        
        url = "https://api.trello.com/1/cards/{}/members".format(i['Trello ID'])
        response = requests.request("GET",url,params=query)
        if response.ok:
            responses = json.loads(response.text)
            if Member_needed in [i['fullName'] for i in responses]:
                print('Ok')
            else:
                print('Missing')
                i['Card Object'].add_member(MemberDict[Member_needed])
                
                
    print(" ")

### Populate start date as entered "in progress" list and end date as entered  "complete" accordingly

In [15]:
for i in dataCard_list:
    if i['List']=='In Progress':
        print(i['Name'])
        df=pd.DataFrame(i['listMovementSummary'])
        print(df[['enteredList','FirstEntered','LastExited']][df['enteredList']=='In Progress'])
        print(" ")

# Get dependencies

In [21]:
dataCard_df[['Name','Blockers/ Dependencies 1','Blockers/ Dependencies 2']][dataCard_df['Blockers/ Dependencies 1'].notna()]

# Map EPICs

#### Import all cards including "completed"
(NB the checklist import isn't working)

In [177]:
dataCard_df,dataCard_list=tg.cards_to_dataframe(dataBoard_creds,checklist_options=None\
                           ,labels_as_binary_flags=True, label_colours=True,comment_names=False,get_attachments=True #)
                                               ,card_number_cutoff=10000,lists_to_exclude=['Template(s)','Ideation','No longer required'])

#### Identify EPICS (as cards with the isEPIC label applied... rather than using the "EPIC" field or being in the "EPIC" list...

In [178]:
EPICS=[]
for i in dataCard_list:
    if 'isEPIC' in i.keys():
        if i['isEPIC']==True:
            EPICS.append(i)
#EPICS

#### Create EPIC labels

In [179]:

labellist=dataBoard.get_labels()

for i in EPICS:
    EPIC_Name=re.sub('\#(\d{1,4}) ','',i['Name'])
    if 'EPIC: {}'.format(EPIC_Name) not in [i.name for i in labellist]:
        dataBoard.add_label('EPIC: {}'.format(EPIC_Name), color='lime')
        print('Created new label: {}'.format(EPIC_Name))

#### Remove labels that shouldn't be there

In [180]:
EPIC_labels=[i for i in labellist if i.name[:4]=='EPIC']
accepted_labels=['EPIC: {}'.format(re.sub('\#(\d{1,4}) ','',i['Name'])) for i in EPICS]

for x in EPIC_labels:
    if x.name not in accepted_labels:
        print(x.name)
        dataBoard.delete_label(x.id)

#### Create a mapping by looping through the checklists on each EPIC card of items assigned to it

In [181]:

Epic_mapping={}
for card in EPICS: #Loop epics
    checklists_to_export=[i for i in card['Card Object'].checklists] # Get the checklists on the cards

    for m,i in enumerate(checklists_to_export): # Loop the checklists
        url = "https://api.trello.com/1/checklists/{}/checkItems".format(i.id)
        querystring = {"filter":'all',"fields":'all',"key":mykey,"token":mytoken}
        if i.name=='Sub Projects': # Return the items on the checklist if it's called "Sub Projects"
            response = requests.request("GET", url, params=querystring)
            checklist_items=json.loads(response.text)
            if checklist_items!=None:
                for j in checklist_items:
                    matched=re.search('https://trello.com/c/.{8}',j['name']) # Extract the common part of the URL
                    try:
                        Epic_mapping[matched[0]]={'URL':card['Trello URL'],'Label':'EPIC: {}'.format(re.sub('\#(\d{1,4} )','',card['Name'])) } #Store in a dictionary
                    except:
                        print('No match on {}'.format(j))
Epic_mapping                        

        

In [182]:

for key in Epic_mapping:
    for card in dataCard_list:
        if key==card['Trello URL']:
            print(key,Epic_mapping[key]['URL'])
            tg.Update_custom_field(myboard_creds=dataBoard_creds, cardId=card['Trello ID'], customFieldname='EPIC', value=Epic_mapping[key]['URL']) # Update the custom field
            
            #Labels=[print(i.name,Epic_mapping[key]['Label']) for i in labellist]# if i.name==Epic_mapping[key]['Label']]
            LabelObject=[i for i in labellist if i.name==Epic_mapping[key]['Label']][0]
            if card[LabelObject.name]==False:
                try:
                    card['Card Object'].add_label(LabelObject)
                except:
                    print('Couldn''t do it')
            else:
                print('Already on')

# Project dependency graph

#### Reload all cards (inc. Completes)

In [6]:
dataCard_df,dataCard_list=tg.cards_to_dataframe(dataBoard_creds,checklist_options=None\
                           ,labels_as_binary_flags=True, label_colours=True,comment_names=False,get_attachments=True #)
                                               ,card_number_cutoff=10000,lists_to_exclude=['Template(s)','Ideation','No longer required','EPICs'])

#### Get Colour palette 

In [7]:
values,colourDf=gaf.read_google_sheets_as_rows('18t_E2ZBbFxxm32ApsajUMP1KGo--m2aqEO6xy4v4XvY','Sheet1',creds)
colourDictHex={i['Colour']:i['HEX'] for i in colourDf[['Colour','HEX','Colour Palette']].to_dict('records') if i['Colour Palette']=='Overall'}
colourDictHexInternal={i['Colour']:i['HEX'] for i in colourDf[['Colour','HEX','Colour Palette']].to_dict('records') if i['Colour Palette']=='Internal'}# and i['Colour'][-9:]!='Highlight']
#colourDf

#### Create Marker Lookups

In [8]:
raw_symbols = SymbolValidator().values
marker_lookup={raw_symbols[i+2]:raw_symbols[i] for i in range(0,len(raw_symbols),3)} # Create a full lookup
marker_lookup_basic={i:marker_lookup[i] for i in marker_lookup if 'open' not in i.split('-') and 'dot' not in i.split('-')} # Subset the lookup

#### Subset the graph database

In [31]:
def Get_EPIC_from_labels(label_string):
    label_string=str(label_string)
    list_of_labels=label_string.split('|')
    list_of_labelDicts=[]
    for i in list_of_labels:
        split=i.split(':')
        if len(split)>1:
            list_of_labelDicts.append({split[0]:split[1]})
    
    label=[]
    for dictobj in list_of_labelDicts:
        for key in dictobj.keys():
            if key.strip()=='EPIC':
                label.append(dictobj[key].strip())
                
    if len(label)>0:
        return label[0]
    else:
        return 'Standalone'
    

# Create sub df for use in graph build    
graphdf=dataCard_df[['Name','Description','Trello URL','List','Card Number','Labels','Hard Deadline','Due Date','Type','Assigned to','EPIC','Depends on','Blockers/ Dependencies 1','Blockers/ Dependencies 2','Blocker 1 Due Date','Blocker 2 Due Date']].copy()   

# Remove the number from the project name for display purposes
graphdf['Name']=graphdf['Name'].apply(lambda x: re.sub('\#(\d{1,4} )','',x))

# Apply the above function to extract the EPIC from the labels (or class as 'standalone' if not)
graphdf['EPIC_Class']=graphdf['Labels'].apply(Get_EPIC_from_labels) 

# Turn the "Depends on" field into a list
graphdf['Dependent']=graphdf['Depends on'].apply(lambda x:  str(x).split(',') if pd.notnull(x) else None) # Extract Dependent by Number

####################################################################################################################################
# Dataframe for Graph Edges
####################################################################################################################################
# Create an edge df by looking at the dependencies field and the blockers field

edge_df=graphdf[graphdf['Dependent'].notna()][['Name','Dependent']].explode('Dependent')# Explode out the dependencies
edge_df['Dependent']=edge_df['Dependent'].astype(int) # Convert from string to integer
edge_df=edge_df.merge(graphdf,left_on='Dependent',right_on='Card Number')[['Name_x','Name_y']].rename(columns={'Name_x':'To','Name_y':'From'}) # Match edges using dependents field

# Create a df holding on the stuff in the dependencies fields, which holds off-board dependencies
external_edges=pd.concat([
graphdf[graphdf['Blockers/ Dependencies 2'].notna()][['Name','Blockers/ Dependencies 2','EPIC_Class']].rename(columns={'Name':'To','Blockers/ Dependencies 2':'From'}),
graphdf[graphdf['Blockers/ Dependencies 1'].notna()][['Name','Blockers/ Dependencies 1','EPIC_Class']].rename(columns={'Name':'To','Blockers/ Dependencies 1':'From'})])

# Union it in to the dataframe
edge_df=pd.concat([edge_df,external_edges[['To','From']]])

# Package up the from/to into a Tuple for plugging into NetworkX
edge_df['Tuple'] = list(zip(edge_df['From'],edge_df['To']))


####################################################################################################################################
# Dataframe for Graph Nodes
####################################################################################################################################
# Create a df to hold the nodes not in cards on my board that are referenced in the edges (BDE projects, etc.)
extranodes=external_edges.groupby(['From','EPIC_Class']).count().drop(columns=['To']).reset_index()
extranodes_df = pd.DataFrame(index=range(0,len(extranodes)),columns = list(graphdf.columns))
extranodes_df['Name']=extranodes['From']
extranodes_df['EPIC_Class']=extranodes['EPIC_Class'] 
extranodes_df['Type']='External' # hard code the type field


# Concatenate the dfs containing cards on this board and cards on other boards (or other references) 
nodedf=graphdf.append(extranodes_df,ignore_index=True)



####################################################################################################################################
# Create fields holding the various display properties of the graph (text, colour, shape, etc.)
####################################################################################################################################

############ Hover text
nodedf['Node_HoverText']='Name: '+nodedf['Name']+'<br>'+'URL: '+nodedf['Trello URL']+'<br>'+'Type: '+nodedf['Type']+'<br>'+'EPIC: '+nodedf['EPIC_Class']



############ Colour (EPICS)

# Create a colour map for use in epics
classes=nodedf['EPIC_Class'].unique() # List unique values for EPICs
loops_round=int(np.ceil(len(classes)/len(colourDictHexInternal))) # If there are more values than there are colours, some colours may need to be re-used, so the loop needs to repeat

# Loop through EPICs, and create a lookup that assigns a key
colourMapEPIC={}
for n,i in enumerate(classes):
    for loop in range(0,loops_round):
        for m,j in enumerate(colourDictHexInternal):
            if n==(loop*len(colourDictHexInternal)+m):
                colourMapEPIC[i]=colourDictHexInternal[j]
                
nodedf['Node_Colour_EPIC']=nodedf['EPIC_Class'].apply(lambda x: colourMapEPIC[x])

############ Colour (Status)

colourMapList={'Backlog': colourDictHexInternal['Sunset Red'],
 'Hygiene Factors': colourDictHexInternal['Sunset Red'],
 'Prioritised': colourDictHexInternal['Sunset Red Highlight'],
 'In Progress': colourDictHexInternal['Orange'],
 'EPICs': '#000000',
 'Paused': colourDictHexInternal['Raspberry'],
 'Completed': colourDictHexInternal['Apple']}
                
nodedf['Node_Colour_Status']=nodedf['List'].apply(lambda x: colourMapList[x] if x in colourMapList.keys() else '#000000' )


############ Markers (EPICS)
markers_to_use={i:marker_lookup_basic[i] for i in marker_lookup_basic if i[:8]!='triangle'}
classes=nodedf['EPIC_Class'].unique() # List unique values for EPICs
loops_round=int(np.ceil(len(classes)/len(markers_to_use))) # If there are more values than there are colours, some colours may need to be re-used, so the loop needs to repeat

# Loop through EPICs, and create a lookup that assigns a key
markerMapEPIC={}
for n,i in enumerate(classes):
    for loop in range(0,loops_round):
        for m,j in enumerate(markers_to_use):
            if n==(loop*len(markers_to_use)+m):
                markerMapEPIC[i]=markers_to_use[j]
                
nodedf['Node_Marker_EPIC']=nodedf['EPIC_Class'].apply(lambda x: markerMapEPIC[x])





####################################################################################################################################
# Initialise the network X graph itself
####################################################################################################################################

G=nx.DiGraph()
G.add_nodes_from(nodedf['Name'])
G.add_edges_from(edge_df['Tuple'])


#### Classify node type
def classify_node(node,G):
    if G.in_degree()[node]==0 and G.out_degree()[node]==0:
        nodeType='unconnected'
    elif G.in_degree()[node]==0 and G.out_degree()[node]>0:
        nodeType='start'
    elif G.in_degree()[node]>0 and G.out_degree()[node]==0:
        nodeType='terminal'
    else:
        nodeType='interim'
    return(nodeType)

nodedf['nodeType']=nodedf['Name'].apply(classify_node,G=G)


Currently:
- Marker size is not varied
- Edge weight is not varied
- Edge colour is not varied
- Edges are not labelled

# Writing something that positions things such that:
- X axis is distributed according to the longest observed path
- Starts are on the left, Terminals on the right, interims in between
OR
- Starts are on the left, everything is positioned according to the max number of nodes before
- isolated nodes are just distributed evenly along the x-axis in a block


- Y axis can be grouped by a column, positionally. In this example it will be EPIC but you could also group by a dummy column
- Each 'group by' is given the same space
-- For each of those, the within-height group is then the max width of each X position (starts, ends or interims)
-- A spacer is added between 

Or do I just iterate, 1 by 1 from left to right?
Or do I look for the most interconnected nodes according to betweeness centrality and position them centrally?

If I have the range of how left or right something could be (left is earliest in path, right is latest), there could be a condition to check its successors
and make sure they're more to the right. If not, shift up? Do while not true

In [32]:
#nodedf['Groupby']='All' # Use if you don't want to group
nodedf['Groupby']=nodedf['EPIC_Class'].astype(str) # Or specify the field to group by
# Identify the groups
Grouplist=list(nodedf['Groupby'].unique())

In [33]:

# Define a function to shift x positions right if there is a feeding node. This assumes the presence of the global var "xposDict"
def shift_successors_right(node,G):
    # List a nodes successors
    succ=list(G.successors(node))
    if len(succ)==0:
        return
    else:
        for s in succ:
            if xposDict[s]<=xposDict[node]: # If successor is positionally equal to or left of the dependant node
                xposDict[s]=xposDict[node]+1 # Update that node
                #print('Updating {} and searching for successors'.format(s))
                shift_successors_right(s,G)
    return None


# Initialise all items
xposDict={node:0 for node in G.nodes()}

# Loop all nodes and shift them and their successors
for node in G.nodes():
    shift_successors_right(node,G)

# Store the positions in a dataframe
xposdf=pd.DataFrame.from_dict(xposDict,orient='index').reset_index().rename(columns={0:'xpos_unadjusted','index':'Name'}) # Store x positions in a dataframe
nodedf=nodedf.merge(xposdf,how='inner',on='Name') # Join them in
max_x=nodedf['xpos_unadjusted'].max() # Identify the maximum positional width
x_scale=max_x+1

#### Update the xposition of unconnected nodes to just span the range
#print(nodedf[nodedf['nodeType']=='unconnected'].xpos_unadjusted.unique())
for Group in Grouplist:
    unconnected_nodes=nodedf[(nodedf['nodeType']=='unconnected')&(nodedf['Groupby']==Group)]['Name']
    unconnected_pos={node:int(n%(max_x+1)) for n,node in enumerate(unconnected_nodes)}
    unconnected_nodes=pd.DataFrame(unconnected_nodes,columns=['Name'])

    unconnected_nodes['xpos_unadjusted']=unconnected_nodes['Name'].apply(lambda x: unconnected_pos[x])
    # Join it back in
    nodedf=nodedf.merge(unconnected_nodes,how='left',on='Name',suffixes=[None,"_new"])
    # Update the column
    nodedf['xpos_unadjusted'] = np.where(nodedf["xpos_unadjusted_new"].isnull(), nodedf["xpos_unadjusted"], nodedf["xpos_unadjusted_new"] )
    # Drop the added column
    nodedf=nodedf.drop(columns=['xpos_unadjusted_new'])
#print(nodedf[nodedf['nodeType']=='unconnected'].xpos_unadjusted.unique())


# Re-scale to be between 0 and 1, with margins  
nodedf['xpos']=(nodedf['xpos_unadjusted']+0.5)/x_scale 
nodedf

In [34]:
# For each group, for each x position, work out how many nodes there will be in the same vertical column
heights_by_group_and_xpos=nodedf[['xpos_unadjusted','Groupby','Name']].groupby(['xpos_unadjusted','Groupby']).count().reset_index()
# For each group, work out what the maximum column height needs to be
heights_by_group=heights_by_group_and_xpos[['Groupby','Name']].groupby('Groupby').max()
# For each group, work out what the maximum column height needs to be, and add '1' position between graphs
heights_by_group['withSpacing']=heights_by_group['Name']+1
# Aggregate the sum of column heights to fit each of the Groupby values one on top of the other
heights_by_group['cutoff']=heights_by_group['withSpacing'].cumsum()
heights_by_group['ypos_min']=heights_by_group['cutoff']-heights_by_group['Name']
heights_by_group['ypos_max']=heights_by_group['ypos_min']-1+heights_by_group['Name']

 # Merge the min and max y range for the group back in to the main dataframe
nodedf=nodedf.merge(heights_by_group[['ypos_min','ypos_max']].reset_index(),how='inner',on='Groupby')

In [35]:
nodedf['ypos_range']=nodedf['ypos_max']-nodedf['ypos_min']


In [36]:
# METHOD 1: RANDOMLY DISTRIBUTE WITHIN VERTICALS
nodedf['ypos_unadjusted']=nodedf['ypos_min']
ypos=nodedf[nodedf['ypos_range']>0][['Name','ypos_min','ypos_max','Groupby','xpos_unadjusted']]
#ypos['ypos_unadjusted']=np.random.randint(ypos['ypos_min'],ypos['ypos_max'])
ypos['posingroup']=ypos.groupby(['Groupby','xpos_unadjusted']).cumcount()
ypos['ypos_unadjusted']=ypos['ypos_min']+ypos['posingroup']



# Join it back in
nodedf=nodedf.merge(ypos[['Name','ypos_unadjusted']],how='left',on='Name',suffixes=[None,"_new"])
# Update the column
nodedf['ypos_unadjusted'] = np.where(nodedf["ypos_unadjusted_new"].isnull(), nodedf["ypos_unadjusted"], nodedf["ypos_unadjusted_new"] )
# Drop the added column
nodedf=nodedf.drop(columns=['ypos_unadjusted_new'])




In [37]:
max_y=nodedf['ypos_unadjusted'].max() # Identify the maximum positional width
y_scale=max_y+1
print(max_y)
nodedf['ypos']=(nodedf['ypos_unadjusted']+0.5)/y_scale 
# Create normal pos dict used for network x, although this isn't actually needed as the info 
# should be read from the df in order to preserve the ordering
pos={nodedf[nodedf['Name']==node]['Name'].values[0]:nodedf[nodedf['Name']==node][['xpos', 'ypos']].to_numpy()[0] for node in G.nodes()}

In [40]:

##############################################################################################################
# Define Nodes
##############################################################################################################
#node_x,node_y = [],[]
#for node in G.nodes():
#    x, y = pos[node] # Extract X & Y from networkX "pos" attribute
#    node_x.append(x)
#    node_y.append(y)

   
# Create data points trace
#node_trace = go.Scatter(x=node_x, y=node_y,
node_trace = go.Scatter(x=nodedf['xpos'], y=nodedf['ypos'],
                        mode='markers',hoverinfo='text'
                        ,hovertext=nodedf['Node_HoverText'],# Read from df
                        text=nodedf['Name'],
                        marker_symbol=nodedf['Node_Marker_EPIC'], # Read marker info from df
                        marker=dict(
        #showscale=True,
        
        color=nodedf['Node_Colour_Status'], 
#color=nodedf['Node_Colour_EPIC'], 
        size=10,
        line_width=0))


##############################################################################################################
# Define Edges
##############################################################################################################

# Create a list of arrows to draw as edges
arrows=[]   
for frm,to in G.edges(): 

    arrow = go.layout.Annotation(dict(x=pos[to][0],y= pos[to][1]
                                      ,xref="x", yref="y",text="",showarrow=True,axref = "x", ayref='y'
                                      ,ax= pos[frm][0],ay= pos[frm][1],
                                      arrowhead = 4,arrowwidth=2,arrowcolor=colourDictHex['Dark Cloud'],))

    arrows.append(arrow)
    


arrows.append(arrows[0]) #For some bizarre reason it leaves off the first one, so I re-add it to the list


##############################################################################################################
# Define Plot layout properties
##############################################################################################################

graph_layout = go.Layout(

                title='<br>BritBox insights team project interdependencies',titlefont_size=16,

                showlegend=False,
width=1000,
    height=1200,
                hovermode='closest',

                #
    margin=dict(b=20,l=5,r=5,t=40),
annotations=arrows,
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    , yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)')



##############################################################################################################
# Display graph
##############################################################################################################
fig = go.Figure(data=[node_trace],# Draw Nodes

             layout=graph_layout)


#fig.update_layout(annotations= arrows,)


fig.show()



In [41]:
fig.write_html("project_graphs.html")



In [49]:
import codecs
f=codecs.open("project_graphs.html", 'r')

text_file = open("Output.txt", "w")
text_file.write(f.read())

text_file.close()


In [44]:
from bs4 import BeautifulSoup
from urllib.request import urlopen

html = urlopen("project_graphs.html").read()
soup = BeautifulSoup(html, features="html.parser")


In [30]:
# Method 1B
# Swap pairs of nodes in a vertical and see if the overall avg difference in positions is minimised

# Identify for all nodes, the min, max and average distance to their predecessors
for loopnum in range(0,1):
    nList=[]
    for node in G.nodes():
        # Get the y pos of the node in question
        node_pos=nodedf.loc[nodedf['Name']==node,'ypos_unadjusted'].values[0]
        
        # Get the neighbours of the node
        succ=list(G.successors(node))
        pred=list(G.predecessors(node))
        neighbours=succ+pred
        
        # If it has neighbours
        if len(neighbours)>0:
            # Calculate the best possible differences it could have for the number of inbound and outbound nodes
            best_diff_succ=[np.floor((n+1)/2) for n,i in enumerate(succ)] # Calculate the best possible differences in ypos [0,1,1,2,2]
            best_diff_pred=[np.floor((n+1)/2) for n,i in enumerate(pred)] # Calculate the best possible differences in ypos [0,1,1,2,2]
            best_diff=np.mean(best_diff_succ+best_diff_pred)

            neighbours_positions=nodedf.loc[nodedf['Name'].isin(neighbours),['ypos_unadjusted']]
             if neighbours_positions.empty:
                avg_distance=None
            else:
                neighbours_positions['difference']=abs(node_pos-neighbours_positions['ypos_unadjusted'])
                avg_difference=neighbours_positions['difference'].mean()
                nList.append({'node':node,
                              'difference':avg_difference,
                              'best difference':best_diff,
                              'error':avg_difference-best_diff,
                              'successors':succ,
                              'predecessors':pred,
                              'neighbours_positions':list(neighbours_positions.loc[:,'ypos_unadjusted']),
                              'current pos':node_pos,
                              'range begin':nodedf.loc[nodedf['Name']==node,'ypos_min_y'].values[0],
                              'range end':nodedf.loc[nodedf['Name']==node,'ypos_max_y'].values[0],
                              'vertical':nodedf.loc[nodedf['Name']==node,'xpos_unadjusted'].values[0],
                              'Groupby':nodedf.loc[nodedf['Name']==node,'Groupby'].values[0]
                             })
            #print('Avg pos is',avg_distance)


        
        #print('''        ''')
        
        
        
    newdf=pd.DataFrame(nList).sort_values(by='error',ascending=False).reset_index(drop=True)
    print(newdf['error'].mean())
newdf
    # Go through each node
        # then through range from and to
    
    
    # Calculate the overall graph's average distance

# Identify, for each node, the minimum obtainable (e.g. if 5 successors, it would have a minimum avg of (0+1+1+2+2/5))
# For nodes with largest variance vs. minimum possible, identify ideal positions (within EPIC range)

# See if putting it in one of the other positions would improve things AND reduce the overall graph 
### (if moving to higher number, shift things up, if moving to lower number, shift things down)



### Other y pos algorithm attempts

In [191]:
succ=['a','b','c','d','e','f','g','h']
best_pos_succ=[np.floor((n+1)/2) for n,i in enumerate(succ)]
best_pos_succ

In [140]:
# METHOD 2: Prioritise/Centralise betweenness centrality & order standalones at the bottom
nodedf['ypos_unadjusted']=None
centrality=dict(G.degree())
nodedf['Centrality']=nodedf['Name'].apply(lambda x: centrality[x] if x in centrality.keys() else 0)


In [None]:
# Within a groupby, pick the node that has the most connected nodes, and put it in the middle of the group

# For that node, loop through it's neighbours and base their position on this one (avg of predecessors)

# Repeat for the next most connected node that doesn't have a position already




In [155]:
# Work out, for each group, which of the x verticals has the most nodes. This is the densest one and so is best placed to dictate other positions
df1=nodedf[['xpos_unadjusted','Groupby','Name']].groupby(['xpos_unadjusted','Groupby']).count().reset_index()
df1['Rank']=df1.groupby('Groupby')['Name'].rank(method="first",ascending=False)
starting_verticals=df1[df1['Rank']==1][['xpos_unadjusted','Groupby']]
starting_verticals

In [171]:
# OR, for left most nodes, order by successors in common
verticals=list(nodedf['xpos'].unique())

for group in Grouplist:
    starting_vertical=starting_verticals.loc[starting_verticals['Groupby']==group,'xpos_unadjusted'].values[0] # Retrieve the initialisation vertical
    
    mini_nodedf=nodedf.loc[(nodedf['xpos_unadjusted']==starting_vertical)&(nodedf['Groupby']==group),['Name']]
    print(mini_nodedf)

In [None]:
# Once initial layer has been done, iterate through layers after it, ordering by average position of predecessors to minimise crossover

In [None]:


ypos=nodedf[nodedf['ypos_range']>0][['Name','ypos_min','ypos_max','Groupby','xpos_unadjusted']]
#ypos['ypos_unadjusted']=np.random.randint(ypos['ypos_min'],ypos['ypos_max'])
ypos['posingroup']=ypos.groupby(['Groupby','xpos_unadjusted']).cumcount()
ypos['ypos_unadjusted']=ypos['ypos_min']+ypos['posingroup']
ypos

In [128]:

# Join it back in
nodedf=nodedf.merge(ypos[['Name','ypos_unadjusted']],how='left',on='Name',suffixes=[None,"_new"])
# Update the column
nodedf['ypos_unadjusted'] = np.where(nodedf["ypos_unadjusted_new"].isnull(), nodedf["ypos_unadjusted"], nodedf["ypos_unadjusted_new"] )
# Drop the added column
nodedf=nodedf.drop(columns=['ypos_unadjusted_new'])


In [106]:
#nodedf[['Name','EPIC_Class','ypos_unadjusted']].sort_values(by='ypos_unadjusted')