# Speed and Quality of Katz-Eigen Community Detection vs Louvain

### Main Take-aways
 Network (nodes, edges)  | Louvain Speed | KE Speed | Louvain Q/Qnorm | KE Q/Qnorm  |
-----------------------  |:-------------:|:--------:|:---------------:|:-----------:|
Amazon Product (328,679) | 0.371 s       | 0.032 s  | 0.801/0.8882    | 0.359/0.769 |
AdHoc BA (1000,16975)    | 11.8 s        | 0.329 s  | 0.485/0.988     | 0.480/0.964 |
AdHoc BA (2000,11035)    | 2 m 2 s       | 0.228 s  | 0.291/0.313     | 0.228/0.484 |
Adhoc BA (10000,95079)   | 2 h 4 m 12 s  | 1.97 s   | 0.203/0.218     | 0.123/0.250 |


In [1]:
import zen
import pandas as pd
import numpy as np
from clusteringAlgo import lineClustering

#### Compare the speed of the Katz-eigen plot method of community detection with that of Louvain community detection, using the 328-node Amazon product network.

In [2]:
def katz(G,tol=0.01,max_iter=1000,alpha=0.001,beta=1):
    iteration = 0
    centrality = np.zeros(G.num_nodes)
    while iteration < max_iter:
        iteration += 1          # increment iteration count
        centrality_old = centrality.copy()

        for node in G.nodes_():
            Ax = 0
            for neighbor in G.neighbors_(node):
                weight = G.weight_(G.edge_idx_(neighbor,node))
                Ax += np.multiply(centrality[neighbor],weight)

                #Ax += centrality[neighbor]      #exclude weight due to overflow in multiplication

            centrality[node] = np.multiply(alpha,Ax)+beta

        if np.sum(np.abs(np.subtract(centrality,centrality_old))) < tol:
            return centrality

In [3]:
def modular_graph(Size1, Size2, edges1, edges2, common, katz_alpha=0.001):
    g1 = zen.generating.barabasi_albert(Size1,edges1)
    avgDeg1 = (2.0 * g1.num_edges)/g1.num_nodes
    lcc1 = np.mean(zen.algorithms.clustering.lcc_(g1))
    
    g2 = zen.generating.barabasi_albert(Size2,edges2)
    avgDeg2 = (2.0 * g2.num_edges)/g2.num_nodes
    lcc2 = np.mean(zen.algorithms.clustering.lcc_(g2))
    
    Size = Size1 + Size2
    G = zen.Graph()
    for i in range(Size):
        G.add_node(i)

    for edge in g1.edges_iter():
        u = edge[0]
        v = edge[1]
        G.add_edge(u,v)

    for edge in g2.edges_iter():
        u = edge[0]+Size1
        v = edge[1]+Size1
        G.add_edge(u,v)

    # Select random pairs of nodes to connect the subgraphs
    join_nodes = np.empty((common,2),dtype=np.int64)
    nodes1 = np.random.randint(0,Size1,size=common)
    nodes2 = np.random.randint(Size1,Size,size=common)
    join_nodes[:,0] = nodes1
    join_nodes[:,1] = nodes2

    for edge in join_nodes:
        if not G.has_edge(edge[0],edge[1]):
            G.add_edge(edge[0],edge[1])
    
    return G

In [4]:
def modularity(G,classDict,classList):
    Q = zen.algorithms.modularity(G,classDict)
    # Maximum Modularity
    count=0.0
    for e in G.edges():
        n1 = G.node_idx(e[0])
        n2 = G.node_idx(e[1])
        if classList[n1] == classList[n2]:
            count += 1
    same = count / G.num_edges
    rand = same - Q
    qmax = 1 - rand
    return Q, qmax

In [5]:
def ke_community_detection(G,dtheta=0.01,dx=0.5,window=10):
    evc = zen.algorithms.eigenvector_centrality_(G)
    kc = katz(G,alpha=1e-4)
    
    #scale
    evc = evc - np.min(evc)
    evc = evc / np.max(evc)
    kc  = kc - np.min(kc)
    kc = kc / np.max(kc)
    
    clusters = lineClustering(evc,kc,dtheta=dtheta,dx=dx,window=window)
    
    ClassDict = {}
    ClassList = np.zeros(G.num_nodes)
    for i,c in enumerate(clusters):
        ClassDict[i] = [G.node_object(x) for x in c]
        ClassList[c]=i

    q,qmax = modularity(G,ClassDict,ClassList)
    print '%d communities found.'%(i+1)
    print 'Q:            %.3f'%q
    print 'Normalized Q: %.3f'%(q/qmax)

In [6]:
from zen.algorithms.community import louvain
def louvain_community_detection(G):
    cset = louvain(G)

    comm_dict = {}
    comm_list = np.zeros(G.num_nodes)
    for i,community in enumerate(cset.communities()):
        comm_dict[i] = community.nodes()
        comm_list[community.nodes_()] = i

    q,qmax = modularity(G,comm_dict,comm_list)
    print '%d communities found.'%(i+1)
    print 'Q:            %.3f'%q
    print 'Normalized Q: %.3f'%(q/qmax)

### Test on Amazon Product Graph

In [9]:
G = zen.io.gml.read('amazon_product.gml',weight_fxn=lambda x: x['weight'])

In [10]:
%%time
ke_community_detection(G)

3 communities found.
Q:            0.359
Normalized Q: 0.769
CPU times: user 33.4 ms, sys: 1.63 ms, total: 35 ms
Wall time: 31.5 ms


In [11]:
%%time
louvain_community_detection(G)

14 communities found.
Q:            0.801
Normalized Q: 0.882
CPU times: user 370 ms, sys: 795 µs, total: 371 ms
Wall time: 371 ms


## Test on synthetic graphs

In [12]:
G_synth = modular_graph(500,500,15,20,100,katz_alpha=1e-4)
print "Nodes: %d"%G_synth.num_nodes
print "Edges: %d"%G_synth.num_edges

Nodes: 1000
Edges: 16975


In [13]:
%%time
ke_community_detection(G_synth)

3 communities found.
Q:            0.480
Normalized Q: 0.964
CPU times: user 329 ms, sys: 31.2 ms, total: 360 ms
Wall time: 329 ms


In [14]:
%%time
louvain_community_detection(G_synth)

2 communities found.
Q:            0.485
Normalized Q: 0.988
CPU times: user 11.8 s, sys: 0 ns, total: 11.8 s
Wall time: 11.8 s


In [18]:
G_synth = modular_graph(1000,1000,4,7,100,katz_alpha=1e-4)
print "Nodes: %d"%G_synth.num_nodes
print "Edges: %d"%G_synth.num_edges

Nodes: 2000
Edges: 11035


In [19]:
%%time
ke_community_detection(G_synth)

2 communities found.
Q:            0.228
Normalized Q: 0.484
CPU times: user 228 ms, sys: 14.8 ms, total: 243 ms
Wall time: 228 ms


In [20]:
%%time
louvain_community_detection(G_synth)

17 communities found.
Q:            0.291
Normalized Q: 0.313
CPU times: user 2min 2s, sys: 1.14 ms, total: 2min 2s
Wall time: 2min 2s


In [7]:
G_synth = modular_graph(5000,5000,5,14,300,katz_alpha=1e-4)
print "Nodes: %d"%G_synth.num_nodes
print "Edges: %d"%G_synth.num_edges

Nodes: 10000
Edges: 95079


In [8]:
%%time
ke_community_detection(G_synth)

2 communities found.
Q:            0.123
Normalized Q: 0.250
CPU times: user 1.96 s, sys: 10.3 ms, total: 1.97 s
Wall time: 1.97 s


In [9]:
%%time
louvain_community_detection(G_synth)

18 communities found.
Q:            0.203
Normalized Q: 0.218
CPU times: user 2h 3min 55s, sys: 1.2 s, total: 2h 3min 56s
Wall time: 2h 4min 12s
