In [16]:
import zen
import numpy as np
import matplotlib.pyplot as plt

In [17]:
def modularity(G,classDict,classList):
    Q = zen.algorithms.modularity(G,classDict)
    # Maximum Modularity
    count=0.0
    for e in G.edges():
        n1 = G.node_idx(e[0])
        n2 = G.node_idx(e[1])
        if classList[n1] == classList[n2]:
            count += 1
    same = count / G.num_edges
    rand = same - Q
    qmax = 1 - rand
    return Q, qmax

In [18]:
def katz(G,tol=0.01,max_iter=1000,alpha=0.001,beta=1):
    iteration = 0
    centrality = np.zeros(G.num_nodes)
    while iteration < max_iter:
        iteration += 1          # increment iteration count
        centrality_old = centrality.copy()

        for node in G.nodes_():
            Ax = 0
            for neighbor in G.neighbors_(node):
                weight = G.weight_(G.edge_idx_(neighbor,node))
                Ax += np.multiply(centrality[neighbor],weight)

                #Ax += centrality[neighbor]      #exclude weight due to overflow in multiplication

            centrality[node] = np.multiply(alpha,Ax)+beta

        if np.sum(np.abs(np.subtract(centrality,centrality_old))) < tol:
            return centrality

In [19]:
G = zen.io.gml.read('amazon_product.gml',weight_fxn=lambda x: x['weight'])

In [27]:
from zen.algorithms.community import spectral_modularity as spm
def spectral_community_detection(G,ke_plot=False):
    cset = spm(G)
    if ke_plot:
        evc = zen.algorithms.eigenvector_centrality_(G)
        kc = katz(G,alpha=1e-4)

        #scale
        evc = evc - np.min(evc)
        evc = evc / np.max(evc)
        kc  = kc - np.min(kc)
        kc = kc / np.max(kc)

    comm_dict = {}
    comm_list = np.zeros(G.num_nodes)
    for i,community in enumerate(cset.communities()):
        comm_dict[i] = community.nodes()
        comm_list[community.nodes_()] = i
        if ke_plot:
            plt.scatter(evc[community.nodes_()],kc[community.nodes_()],s=3,label='cluster %d'%i)
    if ke_plot:
        plt.xlabel('Eigenvector Centrality (normalized)')
        plt.xlabel('Katz Centrality (normalized)')
        plt.legend()
        plt.show()

    q,qmax = modularity(G,comm_dict,comm_list)
    print '%d communities found.'%(i+1)
    print 'Q:            %.3f'%q
    print 'Normalized Q: %.3f'%(q/qmax)

In [29]:
%%time
spectral_community_detection(G)

17 communities found.
Q:            0.781
Normalized Q: 0.875
CPU times: user 720 ms, sys: 783 ms, total: 1.5 s
Wall time: 244 ms
