In [1]:
# Contestant Techniques 
from sklearn.cluster import AgglomerativeClustering
import numpy
from igraph import *
import igraph
import random
import numpy as np
import pandas as pd
from sklearn.metrics.cluster import adjusted_rand_score

path = "/Users/mcdicjh2/Desktop/gecco22/"

for it in range(0,30):

    # Read in graph and associated data
    g_org = Graph.Read_Edgelist(path+"Graph"+str(it)+".dat")
    truth = pd.read_csv(path+"Graph"+str(it)+".truth.dat", sep=',',header=None)
    groundtruth=truth.iloc[:, 1].values.tolist()
    v = pd.read_csv(path+"Graph"+str(it)+".vertices.dat", sep=',',header=None)
    vertices=v.iloc[:, 1].values.tolist()
    edges = g_org.get_edgelist()
    
    # Generate Bipartite graph
    g3 = Graph.Bipartite(vertices, edges)
    n_var = len(vertices)
    lower = vertices.count(0) 


    c= g3.clusters()  # This should always be 1 one now
    g1, g2 = g3.bipartite_projection(multiplicity=True)

    # Run Multi-Level algorithm (not implemented in igraph package)
    multilevel_result = pd.DataFrame(columns=['k','m','m1','m2','ari'])
    test1 = g1.community_multilevel()
    test2 = g2.community_multilevel()

    # Cluster assignment from each projection
    assignment=test1.membership+test2.membership


    k1=max(test1.membership)+1
    k2=max(test2.membership)+1
    d = numpy.zeros(shape=(k1+k2,k1+k2))


    # Calculate dissimilarity matrix between communities
    for ei in range(0,len(edges)):
        #print(edges[ei],edges[ei][0],edges[ei][1])
        index1=assignment[edges[ei][0]]
        index2=k1+assignment[edges[ei][1]]
       # print(index1,index2)
        d[index1][index2]=d[index1][index2]+1
        d[index2][index1]=d[index2][index1]+1

    for d1 in range(0,k1+k2):
        for d2 in range(0,k1+k2):
            d[d1][d2] = 1.0/(1.0+d[d1][d2])
        d[d1][d1] = 0


    ctr=0
    for k in range(len(c),k1+k2):
  
        # Run hierarchical clustering on communities
        clustering = AgglomerativeClustering(n_clusters=k,linkage='average',affinity='precomputed').fit(d)
        labels = clustering.labels_


        newlabels = np.empty(n_var)
        newlabels.fill(0)

        for v in range(0,lower):
            newlabels[v] = labels[assignment[v]]

        for v in range(lower,n_var):
            newlabels[v] = labels[k1+assignment[v]]


        m3 = g3.modularity(newlabels)
        m1 = g1.modularity(newlabels[0:lower],weights=g1.es['weight'])
        m2 = g2.modularity(newlabels[lower:n_var],weights=g2.es['weight'])
        ari = adjusted_rand_score(groundtruth,newlabels)
        multilevel_result.loc[ctr] = k, m3, m1, m2, ari
        ctr=ctr+1


    # Run FastGreedy and Comunity Edge for comparison

    communityedge_result = pd.DataFrame(columns=['k','m','m1','m2','ari'])
    communityedge_res = g3.community_edge_betweenness()

    walktrap_result = pd.DataFrame(columns=['k','m','m1','m2','ari'])
    walktrap_res = g3.community_walktrap()

    fastgreedy_result = pd.DataFrame(columns=['k','m','m1','m2','ari'])
    fastgreedy_res = g3.community_fastgreedy()


   
    for i in range (0,1):
        for k in range(len(c),min(len(c)+15,n_var)):
            
            
            test = fastgreedy_res.as_clustering(k)
            m3 = g3.modularity(test)

            m1 = g1.modularity(test.membership[0:lower],weights=g1.es['weight'])
            m2 = g2.modularity(test.membership[lower:n_var],weights=g2.es['weight'])

            ari = adjusted_rand_score(groundtruth,test.membership)
            fastgreedy_result.loc[i*30+k] = k, m3, m1, m2, ari

            test = communityedge_res.as_clustering(k)
            m3 = g3.modularity(test)

            m1 = g1.modularity(test.membership[0:lower],weights=g1.es['weight'])
            m2 = g2.modularity(test.membership[lower:n_var],weights=g2.es['weight'])

            ari = adjusted_rand_score(groundtruth,test.membership)
            communityedge_result.loc[i*30+k] = k, m3, m1, m2, ari

            test = walktrap_res.as_clustering(k)
            m3 = g3.modularity(test)

            m1 = g1.modularity(test.membership[0:lower],weights=g1.es['weight'])
            m2 = g2.modularity(test.membership[lower:n_var],weights=g2.es['weight'])

            ari = adjusted_rand_score(groundtruth,test.membership)
            walktrap_result.loc[i*30+k] = k, m3, m1, m2, ari
        
    multilevel_result.to_csv(path+"Graph"+str(it)+".multilevel.csv")
    communityedge_result.to_csv(path+"Graph"+str(it)+".communityedge.csv")
    walktrap_result.to_csv(path+"Graph"+str(it)+".walktrap.csv")
    fastgreedy_result.to_csv(path+"Graph"+str(it)+".fastgreedy.csv")







In [2]:
# Yet another contestant technique
# Use of BRIM package for comparison

import condor
import pandas as pd


for it in range(0,30):


    # Read in graph and associated data
    g_org = Graph.Read_Edgelist(path+"Graph"+str(it)+".dat")
    truth = pd.read_csv(path+"Graph"+str(it)+".truth.dat", sep=',',header=None)
    groundtruth=truth.iloc[:, 1].values.tolist()
    v = pd.read_csv(path+"Graph"+str(it)+".vertices.dat", sep=',',header=None)
    vertices=v.iloc[:, 1].values.tolist()
    edges = g_org.get_edgelist()

    g3 = Graph.Bipartite(vertices, edges)
    n_var = len(vertices)
    lower = vertices.count(0)

    g1, g2 = g3.bipartite_projection(multiplicity=True)
    
    el = g3.get_edgelist()
    net = pd.DataFrame(el)



    brim_result = pd.DataFrame(columns=['k','ari','m1','m2','m'])

    for i in range (0,1):

        co = condor.condor_object(net)
        co = condor.initial_community(co)
        co = condor.brim(co)

        n_l = vertices.count(0)
        
        groundtruth1 = groundtruth[0:n_l]
        groundtruth2 = groundtruth[n_l:n_var]
        

        output1=co["reg_memb"]
        output1=output1["com"].tolist()
        output2 = co["tar_memb"]
        output2=output2["com"].tolist()

        
        
        ari = adjusted_rand_score(groundtruth1,output2)
        ari2 = adjusted_rand_score(groundtruth2,output1)
        output3=output2+output1
        #print(output)
        ari3 = adjusted_rand_score(groundtruth,output3)
        print(max(output3)+1,ari,ari2,ari3)

        m = g3.modularity(output3)
        m1 = g1.modularity(output2,weights=g1.es['weight'])#
        m2 = g2.modularity(output1,weights=g2.es['weight'])

        k = (max(output3)+1)
        brim_result.loc[i] = k, ari3, m1, m2, m
        
    brim_result.to_csv(path+"Graph"+str(it)+".brim.csv")


Unweighted network. Weights initialized as 1.
Condor object built in 0.0030732154846191406
Initial community structure computed in  0.0007450580596923828 . Modularity =  0.5091625
Matrices computed in 0.0012791156768798828
0.5308999999999999
0.5310499999999999
0.5310499999999999
10 0.10201912858660998 0.07981148506736337 0.0942790024764206
Unweighted network. Weights initialized as 1.
Condor object built in 0.003574848175048828
Initial community structure computed in  0.0010211467742919922 . Modularity =  0.49841250000000004
Matrices computed in 0.0015869140625
0.50985
0.50985
9 0.07890051983194475 0.05576503708673996 0.07313085335253808
Unweighted network. Weights initialized as 1.
Condor object built in 0.0023589134216308594
Initial community structure computed in  0.001104116439819336 . Modularity =  0.4892875000000001
Matrices computed in 0.0015468597412109375
0.50885
0.50915
0.50915
8 0.0333796940194715 0.012141565777401993 0.028402190525042958
Unweighted network. Weights initiali