In [1]:
import igraph 
import networkx as nx

import sys
sys.path.append('../my_modules/')
from girwan_newman_benchmark import create_GN_benchmark_graph,fraction_of_vertices_correctly_classified

import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

### Define functions for testing

In [2]:
def nx_2_ig(nx_g):
    '''
    function to covert networkx graph to igraph graph
        - i started to work in networkx because i have already worked with that
        but later i started to use igraph because it has the fastgreedy
        algorithm implemented, and it is also supposed to be faster
    '''
    ig_g=igraph.Graph()
    
    #igraph docs say, you can add edges by name
    # but you actually cannot ...
    # so i have to maintain a name - id mapping
    id_name,name_id=dict(),dict()
    for node,i in zip(nx_g.nodes(),range(128)):
        ig_g.add_vertex(name=node)
        id_name[i],name_id[node]=node,i
    
    for edge in nx_g.edges():
        ig_g.add_edge(name_id[edge[0]],name_id[edge[1]])
 
    return ig_g,id_name,name_id

def test_igraph_method(k_in,igraph_method):
    '''
    function to test an igraph community detection method, on a random GN benchmark graph
    '''
    graph = create_GN_benchmark_graph(k_in=k_in)
    ig_g,id_name,name_id = nx_2_ig(graph)
    return fraction_of_vertices_correctly_classified(igraph_method(ig_g).as_clustering(4).membership)

def repeat_test_igraph_method(k_in,igraph_method,M):
    '''
    function to repeat test_igraph_method M times, and calculate some variance
        - interquartile distances used because the variance is highly assymmetric
    '''
    corr_fracs=[test_igraph_method(k_in,igraph_method)  for x in xrange(M) ] 
    return corr_fracs

def scan_k_in_vals(igraph_method,N,M,k_in_min=8,k_in_max=16):
    '''
    function to repeat repeat_test_igraph_method on a range of k_in value
    '''
    k_in_range=np.linspace(k_in_min,k_in_max,N)
    res = np.array([ repeat_test_igraph_method(k_in,igraph_method,M)  for k_in in k_in_range ])
    return res,k_in_range

### Run test

In [None]:
N=33 # number of k_in values
M=100 # repetition number

GN=igraph.Graph.community_edge_betweenness
N_fast=igraph.Graph.community_fastgreedy

GN_res,krange=scan_k_in_vals(GN,N=N,M=M)
N_fast_res,krange=scan_k_in_vals(N_fast,N=N,M=M)

### Plot the mean results
- Does not look like the figure in Newman 2003 

In [None]:
fig,ax=plt.subplots()
fig.set_size_inches(12,9)
#ax.plot(16-krange,GN_mean,'o',c='dodgerblue',ms=10,mec='none')
ax.errorbar(16-krange,np.mean(N_fast_res,axis=1),fmt='o',c='salmon',ms=10,mec='none')
ax.axhline(1,linestyle='dashed',color='grey')
ax.set_ylim(0,1.1)
ax.set_xlabel('average number of inter-community edges per vertex',fontsize=16)
dump=ax.set_ylabel('fraction of nodes correctly assigned',fontsize=16)