In [1]:
import networkx as nx
import numpy as np
import pcabm.sc as sc
import pcabm.pcabm as pca
import pcabm.plem as plem
import pcabm.commFunc as cf
from sklearn.metrics.cluster import adjusted_rand_score

# Data Preprocessing

In [2]:
# Read data
G = nx.read_gml('Data/polblogs.gml')
A = nx.adjacency_matrix(G)
A = A.todense()
Ab=((A+A.T)!=0).astype(int)
np.fill_diagonal(Ab,0)

In [3]:
# Find the largest cluster 
start=0
check_set=set(np.nonzero(Ab[start,:])[0])
comm_set=set()
while len(check_set)>0:
    bar = check_set.copy()
    for item in bar:
        temp=set(np.nonzero(Ab[item,:])[0])-comm_set
        check_set=check_set|temp
        check_set.discard(item)
        comm_set=comm_set|{item}
comm_set=np.array(list(comm_set))
Ab = np.array(Ab[np.ix_(comm_set,comm_set)])

In [4]:
# Create Label
label=[]
for index in comm_set:
    label.append(G.nodes[list(G.nodes.keys())[index]]['value'])
label = np.array(label)

# Create Covariate
k,p,n=2,1,1222
degree = sum(Ab)
Z = np.zeros(n*n*p);Z=Z.reshape((n,n,p))
for i in np.arange(0,n):
    for j in np.arange(0,n):
        Z[i,j,0] = np.log(degree[i]*degree[j])

# Spectral Clustering

In [5]:
print("Without regularization:")
_,_,modelSC = sc.SC(Ab,2)
print('ARI is',adjusted_rand_score(modelSC.labels_,label))

Without regularization:
ARI is 0.07990011061571035


In [6]:
print("With regularization:")
_,_,modelSC_r = sc.SC_r(Ab,2)
print('ARI is',adjusted_rand_score(modelSC_r.labels_,label))

With regularization:
ARI is 0.09967800244946386


# PLEM

In [7]:
modelPLEM = plem.PLEM(Ab,Z,2)
estPLEM,_ = modelPLEM.fit(gt=label)
print('ARI is',adjusted_rand_score(estPLEM,label))

ARI is 0.8013154251090011


In [8]:
print(modelPLEM.gamma)
FI = cf.Info(estPLEM,modelPLEM.gamma,Ab,Z)
print('Lower: ',modelPLEM.gamma-1.96*np.sqrt(np.diagonal(np.linalg.inv(FI))))
print('Upper: ',modelPLEM.gamma+1.96*np.sqrt(np.diagonal(np.linalg.inv(FI))))

[1.00052433]
Lower:  [0.98985114]
Upper:  [1.01119752]
