# TOP INFLUENCERS IN A GRAPH 
# ACCORDING TO VARIOUS CENTRALITY INDICES

## By Moses A. Boudourides

In [1]:
%matplotlib inline 
# %load_ext autoreload

import networkx as nx
import pandas as pd
import numpy as np
# import seaborn as sns
from scipy import stats
import os
import imp
import random
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rc('text', usetex = False)
mpl.rc('font', family = 'serif')
import math
import pylab

## Generation of a (strongly) connected random graph G
### In the first three lines below, set the number of nodes, edges and True/False for directed/undirected graph

In [2]:
number_of_nodes = 100
number_of_edges = 1000
directed_graph = True

while True:
    G=nx.gnm_random_graph(number_of_nodes,number_of_edges,directed=directed_graph) 
    if nx.is_directed(G) == True:
        zz=[]
        if nx.number_strongly_connected_components(G)==1:
            break        
    else:
        zz=[]
        for i in G.nodes():
            if G.degree(i)==0:
                zz.append(i)  
        if nx.number_connected_components(G)==1:
            break
# G=nx.random_regular_graph(3,10)            
# print zz
# nx.draw_networkx(G)
# plt.axis("off");
# nx.write_gpickle(G,'test.pic')
if nx.is_directed(G) == True:
    print 'G is a strongly connected gnm random directed graph with %i nodes and %i edges' %(len(G.nodes()), len(G.edges()))
else:
    print 'G is a connected gnm random undirected graph with %i nodes and %i edges' %(len(G.nodes()), len(G.edges()))

G is a strongly connected gnm random directed graph with 100 nodes and 1000 edges


## Random assignment of a scalar attribute on edges of G
### In the first line below set an integer as the maximum value of the attribute

In [3]:
maxw=100

weis=range(1,maxw+1)
from_list=[]
to_list=[]
weights_list = []
for ed in G.edges():
    wei=random.choice(weis)
    G.add_edge(ed[0],ed[1],weight=wei)
    from_list.append(ed[0])
    to_list.append(ed[1])
    weights_list.append(wei)
edge_pand=pd.DataFrame({'from node':from_list, 'to node':to_list,'edge weight':weights_list})

edge_pand=edge_pand[['from node','to node','edge weight']]
edge_pand.head()

Unnamed: 0,from node,to node,edge weight
0,0,3,36
1,0,17,42
2,0,82,47
3,0,52,50
4,0,87,50


## Various centrality indices on nodes of G

In [4]:
def create_centralities_list(G,maxiter=2000,pphi=5,centList=[]):
    if len(centList)==0:
        centList=['degree','closeness','betweenness','eigenvector','katz','page_rank','hits']
    cenLen=len(centList)
    valus={}
    # plt.figure(figsize=figsi)
    for uu,centr in enumerate(centList):
        if centr=='degree':
            if isinstance(G,nx.DiGraph):
                cent=nx.in_degree_centrality(G)
                sstt='In Degree Centralities '
                valus['in_degree']=cent
                cent=nx.out_degree_centrality(G)
                sstt+= 'and Out Degree Centralities'
                valus['out_degree']=cent
            else:
                cent=nx.degree_centrality(G)
                sstt='Degree Centralities'
                ssttt='degree centrality'
                valus[centr]=cent
        elif centr=='closeness':
            cent=nx.closeness_centrality(G)
            sstt='Closeness Centralities'
            ssttt='closeness centrality'
            valus[centr]=cent

        elif centr=='betweenness':
            cent=nx.betweenness_centrality(G)
            sstt='Betweenness Centralities'
            ssttt='betweenness centrality'
            valus[centr]=cent

        elif centr=='eigenvector':
            try:
                cent=nx.eigenvector_centrality(G,max_iter=maxiter)
                sstt='Eigenvector Centralities'
                ssttt='eigenvector centrality'
                valus[centr]=cent

            except:
                valus[centr]=None

                continue
        elif centr=='katz':
            phi = (1+math.sqrt(pphi))/2.0 # largest eigenvalue of adj matrix
            cent=nx.katz_centrality_numpy(G,1/phi-0.01)
            sstt='Katz Centralities'
            ssttt='Katz centrality'
            
            valus[centr+'_%i' %pphi]=cent

        elif centr=='page_rank':
            try:
                cent=nx.pagerank(G)
                sstt='PageRank'
                ssttt='pagerank'
                valus[centr]=cent

            except:
                valus[centr]=None

                continue
        elif centr=='hits':
            if isinstance(G,nx.DiGraph):
                dd=nx.hits(G,max_iter=maxiter)
                sstt='Hits hubs '
                valus['hits_hubs']=dd[0]
                sstt+= 'and Hits authorities'
                valus['hits_auths']=dd[1]
            else:
                dd=nx.hits(G,max_iter=maxiter)
                cent=nx.degree_centrality(G)
                sstt='Hits'
                ssttt='hits centrality'
                valus[centr]=dd[0]
        print '%s done!!!' %sstt
    return valus

In [5]:
central_pd=pd.DataFrame(create_centralities_list(G))

In Degree Centralities and Out Degree Centralities done!!!
Closeness Centralities done!!!
Betweenness Centralities done!!!
Eigenvector Centralities done!!!
Katz Centralities done!!!
PageRank done!!!
Hits hubs and Hits authorities done!!!


In [6]:
dindices=['out_degree','in_degree','closeness','betweenness','eigenvector','hits_hubs','hits_auths','katz_5','page_rank']
indices=['degree','closeness','betweenness','eigenvector','hits','katz_5','page_rank']
# indices=['degree','closeness','betweenness','eigenvector','hits_hubs','hits_auths','katz_5','page_rank']
if isinstance(G,nx.DiGraph):
    central_pd=central_pd[dindices]
else:
    central_pd=central_pd[indices]
central_pd

Unnamed: 0,out_degree,in_degree,closeness,betweenness,eigenvector,hits_hubs,hits_auths,katz_5,page_rank
0,0.060606,0.060606,0.400810,0.003903,0.097618,0.005780,0.007446,0.111884,0.010584
1,0.101010,0.141414,0.443946,0.015019,0.133465,0.009873,0.016202,0.062801,0.013230
2,0.080808,0.090909,0.432314,0.009456,0.081756,0.008713,0.013826,0.086786,0.007320
3,0.111111,0.090909,0.462617,0.012492,0.090642,0.011658,0.011019,0.243049,0.010409
4,0.111111,0.141414,0.458333,0.015278,0.135445,0.010748,0.013693,0.061856,0.014392
5,0.070707,0.101010,0.428571,0.008254,0.096065,0.003222,0.011086,0.084622,0.008942
6,0.101010,0.111111,0.456221,0.013404,0.081784,0.006456,0.009607,-0.187644,0.007853
7,0.111111,0.060606,0.471429,0.009864,0.041187,0.013879,0.002931,0.011091,0.005719
8,0.121212,0.080808,0.469194,0.010495,0.076676,0.014582,0.009322,-0.065631,0.007826
9,0.090909,0.050505,0.454128,0.006402,0.044442,0.006122,0.003252,-0.035102,0.004999


In [7]:
def picker(G,cols,thres):
    for al in range(len(G.nodes())):
        degs_dic={}
        deggs=None
        for col in colss:
            degress=central_pd.sort_values(col,ascending=False).head(al).to_dict()
            if deggs is None:
                deggs=set(degress[col].keys())
            else:
                deggs=deggs.intersection(set(degress[col].keys()))
            degs_dic[col]=degress[col].keys()
        if len(deggs)>=thres:
            break
    print 'The %i Influencer nodes belonging to all of the top centralities up to rank %i are:' %(thres,al)
    print list(deggs)
    print 
    for i in deggs:
        ss='Influencer node %s is ranked: \n' %i  #has ranking index
        for nn,kk in degs_dic.items():
            ss+='%i-th wrt %s\n' %(kk.index(i)+1,nn)
        print ss
    return al,deggs

## Detecting Top Influencers wrt (any number of the previous) centrality indices
### In the first lines of the next cell, select:
* ### the desired centrality indices over which the influencers should be defined (set "colss=dindices" for all centralities when the graph is directed or "colss=indices" for all centralities when the graph is undirected or remove any number of them)
* ### the desired number of influencers (thres) who belong to all of the top centralities up to a minimal rank (which is going to be returned by running the previous function)

In [8]:
# colss=['degree','closeness','betweenness','eigenvector','hits_hubs','hits_auths','katz_5','page_rank']
# colss=['degree','closeness','betweenness',]
colss=dindices
thres=10

al,deggs = picker(G,colss,thres) #,thres=1)
central_pd.iloc[list(deggs)]

The 10 Influencer nodes belonging to all of the top centralities up to rank 44 are:
[66, 36, 78, 19, 52, 87, 4, 28, 94, 31]

Influencer node 66 is ranked: 
31-th wrt hits_hubs
32-th wrt out_degree
31-th wrt in_degree
31-th wrt closeness
32-th wrt page_rank
28-th wrt betweenness
27-th wrt katz_5
30-th wrt hits_auths
30-th wrt eigenvector

Influencer node 36 is ranked: 
18-th wrt hits_hubs
20-th wrt out_degree
14-th wrt in_degree
18-th wrt closeness
17-th wrt page_rank
17-th wrt betweenness
17-th wrt katz_5
16-th wrt hits_auths
15-th wrt eigenvector

Influencer node 78 is ranked: 
37-th wrt hits_hubs
37-th wrt out_degree
37-th wrt in_degree
37-th wrt closeness
36-th wrt page_rank
35-th wrt betweenness
33-th wrt katz_5
35-th wrt hits_auths
35-th wrt eigenvector

Influencer node 19 is ranked: 
7-th wrt hits_hubs
9-th wrt out_degree
6-th wrt in_degree
8-th wrt closeness
7-th wrt page_rank
7-th wrt betweenness
10-th wrt katz_5
9-th wrt hits_auths
7-th wrt eigenvector

Influencer node 52 is r

Unnamed: 0,out_degree,in_degree,closeness,betweenness,eigenvector,hits_hubs,hits_auths,katz_5,page_rank
66,0.131313,0.131313,0.475962,0.016022,0.13949,0.013439,0.019616,0.196573,0.012704
36,0.121212,0.20202,0.473684,0.034888,0.163494,0.012715,0.019275,0.094239,0.018185
78,0.121212,0.131313,0.469194,0.02064,0.14584,0.011148,0.015557,0.098001,0.01457
19,0.121212,0.161616,0.471429,0.023601,0.159701,0.010415,0.015844,0.069767,0.014662
52,0.121212,0.121212,0.473684,0.014766,0.125634,0.011704,0.016704,0.028925,0.012193
87,0.111111,0.111111,0.487685,0.016054,0.099986,0.013741,0.011645,0.068349,0.01104
4,0.111111,0.141414,0.458333,0.015278,0.135445,0.010748,0.013693,0.061856,0.014392
28,0.151515,0.151515,0.492537,0.027827,0.118238,0.014446,0.014759,0.085795,0.012269
94,0.171717,0.151515,0.515625,0.030772,0.169284,0.023797,0.021795,0.038298,0.014873
31,0.131313,0.121212,0.482927,0.019037,0.116871,0.012069,0.010622,0.030724,0.013455


## Detecting the Spheres of Influence that Top Influencers have 
### In the first line of the next cell, select the radius of influence (ri)

In [9]:
ri = 3

for x in deggs:
    ss='The radius %i order Sphere of Influence of Influencer node %s is: \n' %(ri,x)  
    ni = nx.single_source_shortest_path_length(G, x, cutoff=ri).keys()
    print ss, ni
    print
niu = list(set.union(*[set(nx.single_source_shortest_path_length(G, x, cutoff=ri).keys()) for x in deggs]))
print 'The set of nodes influenced by all Influencers at radius %i is:' %ri
print niu

The radius 3 order Sphere of Influence of Influencer node 66 is: 
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]

The radius 3 order Sphere of Influence of Influencer node 36 is: 
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]

The radius 3 order Sphere of Influence of Influencer node 78 is: 
[0, 1, 2, 3, 4, 5,