# Load packages

In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import os
current_path = os.getcwd()

# Visualize data format

In [2]:
org_chart = pd.read_csv('org_chart.csv')
org_chart.head()

Unnamed: 0,from,to
0,Jose,Mateo
1,Jose,Carlos 1
2,Jose,Pablo
3,Jose,Deisy
4,Mateo,Camila


# Organizational chart (ONA 1.0)

In [27]:
from pyvis.network import Network


G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
G=nx.from_pandas_edgelist(org_chart, 'to', 'from')

nt = Network('700px', '700px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart.html')

In [29]:
!pip install pydot

Collecting pydot
  Downloading pydot-1.4.2-py2.py3-none-any.whl (21 kB)
Installing collected packages: pydot
Successfully installed pydot-1.4.2



# Pagerank with bidirectional comunication

The most important nodes for the information flow are the mid leaders!!

In [20]:

#G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
G=nx.from_pandas_edgelist(org_chart, 'to', 'from')
pagerank =  nx.pagerank(G,max_iter=500)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = (np.exp((pagerank[i]-a)/(b-a))+2)**2.5
    aux = (1+pagerank[i])**20
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    
nt = Network('500px', '500px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_pagerank.html')

# Pagerank with top down flow

The most important is the biggest boss

In [22]:
G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
pagerank =  nx.pagerank(G,max_iter=500)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    
nt = Network('500px', '500px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_pagerank.html')

# Bet Centrality in bidirectional networks 

In [24]:


G=nx.from_pandas_edgelist(org_chart, 'to', 'from')


betweenness_centrality = nx.algorithms.centrality.betweenness_centrality(G, k=None)

a = min(list(betweenness_centrality.values()))
b = max(list(betweenness_centrality.values()))
for i in betweenness_centrality.keys():
    aux = np.exp((betweenness_centrality[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(betweenness_centrality[i])
    
nt = Network('700px', '700px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_bc.html')

# With topdown directions

Again... the only bridge would be the big boss

In [26]:



G=nx.from_pandas_edgelist(org_chart, 'to', 'from',create_using=nx.DiGraph())
ev_centrality = nx.algorithms.centrality.eigenvector_centrality(G,max_iter=500)
a = min(list(ev_centrality.values()))
b = max(list(ev_centrality.values()))
for i in ev_centrality.keys():
    aux = np.exp((ev_centrality[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**15
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(ev_centrality[i])
    
nt = Network('700px', '700px', directed = False, notebook=True)
# populates the nodes and edges data structures
nt.from_nx(G)
nt.show('org_chart_evc.html')

# We can learn something!

With hierachical structures where the information flows towards the big manager the most important person is he or she... But when information flows both ways the most important nodes for communication tasks are the mid level managers, that means that if you want to spread a message the most effective way would be with those managers.

# Example using meetings data (and how to add images)

In [66]:
meetings = pd.read_csv('meetings.csv')
meetings = meetings.rename(columns={'n_meetings':'value'})
print(meetings.head())


G=nx.from_pandas_edgelist(meetings, 'to', 'from',["value"])


pos = nx.spring_layout(G)


pagerank =  nx.pagerank(G,max_iter=10000)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    aux = (1+pagerank[i])**20
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])



nt = Network('700px', '700px', directed = False)
# populates the nodes and edges data structures
nt.from_nx(G)

for e in G.edges():
    nt.add_edge(e[0],e[1], weight = e,value = e)

nt.force_atlas_2based()
nt.show('meeting_pagerank_image.html')


    from        to  value
0   Jose     Mateo     10
1   Jose  Carlos 1      3
2   Jose     Pablo     15
3   Jose     Deisy      2
4  Mateo    Camila     10


# Create a final visual

- Let us understand which are the stronger connections
- Put the profile image (if available)
- Set the size of the node according to the pagerank

In [62]:
directory = pd.read_csv('directory.csv')
directory.head()



G=nx.from_pandas_edgelist(meetings, 'to', 'from',["value"])


pos = nx.spring_layout(G)


pagerank =  nx.pagerank(G,max_iter=10000)
a = min(list(pagerank.values()))
b = max(list(pagerank.values()))
for i in pagerank.keys():
    aux = np.exp((pagerank[i]-a)/(b-a))*10
    G.nodes[i]['size'] = aux
    G.nodes[i]['title'] = str(pagerank[i])
    node_image = directory.image[directory['Name']==i]
    if not node_image.isnull().iloc[0]:
        node_image = node_image.iloc[0]
        G.nodes[i]['shape'] = 'image'
        G.nodes[i]['image']= f'file://{current_path}/{node_image}'


nt = Network('700px', '700px', directed = False)
# populates the nodes and edges data structures
nt.from_nx(G)

for e in G.edges():
    nt.add_edge(e[0],e[1], weight = e,value = e)

nt.force_atlas_2based()
nt.show('meeting_pagerank_image.html')

# Wrapping the whole code

In [99]:
def set_title(df,value, metric):
    title = df.Name.iloc[0] + ' <br> '
    title = title + f'{metric} {value}'
    return title

def build_ona(nodes,interactions,metric, size_function = lambda x: (x+1)**15):
    assert 'Name' in nodes.columns
    assert 'image' in nodes.columns
    assert 'from' in interactions.columns
    assert 'to' in interactions.columns
    assert 'value' in interactions.columns
    assert metric in ['pagerank', 'ev_centrality', 'bt_centrality']
    
    G = nx.from_pandas_edgelist(interactions, 'to', 'from',["value"])
    
    if metric=='pagerank':
        sizes =  nx.pagerank(G,max_iter=10000,weight='value')
    elif metric=='ev_centrality':
        sizes =  nx.algorithms.centrality.eigenvector_centrality(G,max_iter=500,weight='value')
    else:
        sizes =  nx.algorithms.centrality.betweenness_centrality(G,weight = 'value')

    for i in sizes.keys():
        aux =size_function(sizes[i])
        
        G.nodes[i]['size'] = aux

        G.nodes[i]['title'] = set_title(directory[directory['Name']==i], sizes[i],metric)
        
        node_image = nodes.image[nodes['Name']==i]
        
        if not node_image.isnull().iloc[0]:
            node_image = node_image.iloc[0]
            G.nodes[i]['shape'] = 'image'
            G.nodes[i]['image']= f'file://{current_path}/{node_image}'
            
            
    nt = Network('700px', '700px', directed = False)

    nt.from_nx(G)

    for e in G.edges():
        nt.add_edge(e[0],e[1], weight = e,value = e, title = e)

    nt.force_atlas_2based()
            
    
    
    return nt

ona = build_ona(nodes=directory,interactions= meetings,metric = 'ev_centrality',
               size_function = lambda x: (x+1)**10)

ona.show('final_ona.html')

# Now... what can we do?

First of all we can see our organization with a simple graph.

But we can also check for some amazing correlations such as:

- PageRank and promotion rate
- PageRank and Attrition rate

Among others

In [108]:
meetings.head()

Unnamed: 0,from,to,value
0,Jose,Mateo,10
1,Jose,Carlos 1,3
2,Jose,Pablo,15
3,Jose,Deisy,2
4,Mateo,Camila,10


In [111]:
from pyvis.network import Network
import pandas as pd

got_net = Network(height='750px', width='100%', bgcolor='#222222', font_color='white')

# set the physics layout of the network
got_net.barnes_hut()
got_data = pd.read_csv('got_edges.csv').head(100)

sources = meetings['from'] #got_data['Source']
targets = meetings.to #got_data['Target']
weights = meetings.value #got_data['weight']

edge_data = zip(sources, targets, weights)

for e in edge_data:
    src = e[0]
    dst = e[1]
    w = e[2]

    got_net.add_node(src, src, title=src)
    got_net.add_node(dst, dst, title=dst)
    got_net.add_edge(src, dst, value=w)

neighbor_map = got_net.get_adj_list()

# add neighbor data to node hover data
for node in got_net.nodes:
    node['title'] += ' Neighbors:<br>' + '<br>'.join(neighbor_map[node['id']])
    node['value'] = len(neighbor_map[node['id']])

got_net.show('gameofthrones.html')