# Load packages

In [6]:
import pandas as pd
import networkx as nx
import numpy as np
import os
current_path = os.getcwd()

# Visualize data format

In [7]:
org_chart = pd.read_csv('org_chart.csv')
org_chart.head()

Unnamed: 0,from,to
0,Jose,Mateo
1,Jose,Carlos 1
2,Jose,Pablo
3,Jose,Deisy
4,Mateo,Camila


# Organizational chart (ONA 1.0)

In [8]:
from pyvis.network import Network


G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
#G=nx.from_pandas_edgelist(org_chart, 'to', 'from')

nt = Network('700px', '700px', directed = True, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart.html')


# Pagerank with bidirectional comunication

The most important nodes for the information flow are the mid leaders!!

In [9]:

#G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
G=nx.from_pandas_edgelist(org_chart, 'to', 'from')
pagerank =  nx.pagerank(G,max_iter=500)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = (np.exp((pagerank[i]-a)/(b-a))+2)**2.5
    aux = (1+pagerank[i])**20
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    
nt = Network('500px', '500px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_pagerank.html')

# Pagerank with top down flow

The most important is the biggest boss

In [10]:
G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
pagerank =  nx.pagerank(G,max_iter=500)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    
nt = Network('500px', '500px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_pagerank.html')

# Bet Centrality in bidirectional networks 

In [11]:


G=nx.from_pandas_edgelist(org_chart, 'to', 'from')


betweenness_centrality = nx.algorithms.centrality.betweenness_centrality(G, k=None)

a = min(list(betweenness_centrality.values()))
b = max(list(betweenness_centrality.values()))
for i in betweenness_centrality.keys():
    aux = np.exp((betweenness_centrality[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(betweenness_centrality[i])
    
nt = Network('700px', '700px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_bc.html')

# With topdown directions

Again... the only bridge would be the big boss

In [12]:



G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
ev_centrality = nx.algorithms.centrality.eigenvector_centrality(G,max_iter=500)
a = min(list(ev_centrality.values()))
b = max(list(ev_centrality.values()))
for i in ev_centrality.keys():
    aux = np.exp((ev_centrality[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(ev_centrality[i])
    
nt = Network('700px', '700px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_evc.html')

# We can learn something!

With hierachical structures where the information flows towards the big manager the most important person is he or she... But when information flows both ways the most important nodes for communication tasks are the mid level managers, that means that if you want to spread a message the most effective way would be with those managers.

# Example using meetings data 

In [13]:
meetings = pd.read_csv('meetings.csv')
meetings = meetings.rename(columns={'n_meetings':'value'})
print(meetings.head())


G=nx.from_pandas_edgelist(meetings, 'to', 'from',["value"])


pos = nx.spring_layout(G)


pagerank =  nx.pagerank(G,max_iter=10000)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**20
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])



nt = Network('700px', '700px', directed = False)
# populates the nodes and edges data structures
nt.from_nx(G)

for e in G.edges():
    nt.add_edge(e[0],e[1], weight = e,value = e)

nt.force_atlas_2based()
nt.show('meeting_pagerank_image.html')


    from        to  value
0   Jose     Mateo     10
1   Jose  Carlos 1      3
2   Jose     Pablo     15
3   Jose     Deisy      2
4  Mateo    Camila     10


# Create a final visual

- Let us understand which are the stronger connections
- Put the profile image (if available)
- Set the size of the node according to the pagerank

In [14]:
directory = pd.read_csv('directory.csv')
directory.head()



G=nx.from_pandas_edgelist(meetings, 'to', 'from',["value"])


pos = nx.spring_layout(G)


pagerank =  nx.pagerank(G,max_iter=10000)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    node_image = directory.image[directory['Name']==i]
    if not node_image.isnull().iloc[0]:
        node_image = node_image.iloc[0]
        G.nodes[i]['shape'] = 'image'
        G.nodes[i]['image']= f'file://{current_path}/{node_image}'


nt = Network('700px', '700px', directed = False)
# populates the nodes and edges data structures
nt.from_nx(G)

for e in G.edges():
    nt.add_edge(e[0],e[1], weight = e,value = e)

nt.force_atlas_2based()
nt.show('meeting_pagerank_image.html')

# Wrapping the whole code

In [15]:
from networkx import pagerank
from networkx.algorithms.centrality import eigenvector_centrality,betweenness_centrality


def set_title(df,value, metric):
    title = df.Name.iloc[0] + ' <br> '
    title = title + f'{metric} {value}'
    return title

def build_ona(nodes,interactions,metric, size_function = lambda x: (x+1)**15):
    assert 'Name' in nodes.columns
    assert 'image' in nodes.columns
    assert 'from' in interactions.columns
    assert 'to' in interactions.columns
    assert 'value' in interactions.columns
    assert metric in ['pagerank', 'ev_centrality', 'bt_centrality']
    
    print(interactions.head())
    G = nx.from_pandas_edgelist(interactions, 'to', 'from',["value"])
      
    if metric=='pagerank':
        sizes =  pagerank(G,max_iter=10000,weight='value')
    elif metric=='ev_centrality':
        sizes =  eigenvector_centrality(G,max_iter=500,weight='value')
    else:
        sizes =  betweenness_centrality(G,weight = 'value')
    print(G)
    for i in sizes.keys():
        # a function that gets a numeric value and return a numeric value...
        # The idea is that we can experiment with different options to see
        # which one give us the desired plot.
        aux =size_function(sizes[i])
        
        G.nodes[i]['size'] = aux
        
        # set the title of the node... what we see when we click the node
        G.nodes[i]['title'] = set_title(nodes[nodes['Name']==i], sizes[i],metric)
        
        node_image = nodes.image[nodes['Name']==i]
        
        if not node_image.isnull().iloc[0]:
            # In case that we have an available image we replace the standard
            # image with the employee photo
            node_image = node_image.iloc[0]
            G.nodes[i]['shape'] = 'image'
            G.nodes[i]['image']= f'file://{current_path}/{node_image}'
            
            
    nt = Network('700px', '700px', directed = False)

    nt.from_nx(G)

    for e in G.edges():
        nt.add_edge(e[0],e[1], weight = e,value = e, title = e)# Set size of edge as the weight

    nt.force_atlas_2based()
            
    
    
    return nt

ona = build_ona(nodes=directory,interactions= meetings,metric = 'pagerank',
               size_function = lambda x: ((x+0.1)*100))

ona.show('final_ona.html')

# Now... what can we do?

First of all we can see our organization with a simple graph.

But we can also check for some amazing correlations such as:

- PageRank and promotion rate
- PageRank and Attrition rate

Among others

# Let's see some NA with more "realistic" data

In [35]:
from pyvis.network import Network
import pandas as pd

edges = pd.read_csv('got_edges.csv')[['Source','Target', 'weight']]

edges.columns = ['from','to', 'value']

#edges = edges.query('value>10')

got_nodes = pd.read_csv('got_nodes.csv')

got_nodes = got_nodes.rename(columns={'Id':'Name'})

got_nodes['image'] = np.nan

got = build_ona(nodes=got_nodes,interactions= edges,metric = 'ev_centrality',
               size_function = lambda x: ((x+0.1)*100))

got.show('final_got.html')

             from                to  value
0  Addam-Marbrand     Brynden-Tully      3
1  Addam-Marbrand  Cersei-Lannister      3
2  Addam-Marbrand       Gyles-Rosby      3
3  Addam-Marbrand   Jaime-Lannister     14
4  Addam-Marbrand      Jalabhar-Xho      3
Graph with 796 nodes and 2823 edges


In [None]:
got_edges.value.sum()