In [39]:
import networkx as nx
import numpy as np
import pcabm.sc as sc
import pcabm.pcabm as pca
from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.metrics import normalized_mutual_info_score

# Data Preprocessing

In [2]:
# Read Data
G = nx.read_gml('Data/polblogs/polblogs.gml')
A = nx.adjacency_matrix(G)
A = A.todense()
Ab=((A+A.T)!=0).astype(int)
np.fill_diagonal(Ab,0)

In [3]:
# Find the largest cluster 
start=0
check_set=set(np.nonzero(Ab[start,:])[1])
comm_set=set()
while len(check_set)>0:
    bar = check_set.copy()
    for item in bar:
        temp=set(np.nonzero(Ab[item,:])[1])-comm_set
        check_set=check_set|temp
        check_set.discard(item)
        comm_set=comm_set|{item}
comm_set=np.array(list(comm_set))
Ab = np.array(Ab[np.ix_(comm_set,comm_set)])

In [5]:
# Create Label
label=[]
for index in comm_set:
    label.append(G.node[list(G.node.keys())[index]]['value'])
label = np.array(label)

# Create Covariate
k,p,n=2,1,1222
degree = sum(Ab)
Z = np.zeros(n*n*p);Z=Z.reshape((n,n,p))
for i in np.arange(0,n):
    for j in np.arange(0,n):
        Z[i,j,0] = np.log(degree[i]*degree[j])

# Spectral Clustering

In [40]:
modelSC = sc.SC(Ab,2)
print('ARI is',adjusted_rand_score(modelSC.labels_,label))
print('NMI is',normalized_mutual_info_score(modelSC.labels_,label))

ARI is 0.07990011061571035
NMI is 0.18691875458682788


PCABM

In [38]:
modelPCA = pca.PCABM(Ab,Z,2)
estPCA,nll = modelPCA.fit(community=modelSC.labels_,gt=label,tabu_size=300,max_iterations=30000,max_stay=1500)

ARI is 0.8130899775528108
NMI is 0.7223139311594611
