In [2]:
import numpy as np 
import networkx as nx
import cdlib as cd
from cdlib import algorithms, viz
from cdlib import evaluation
from cdlib import NodeClustering
from networkx.generators.community import LFR_benchmark_graph
from matplotlib import pyplot as plt

Note: to be able to use all crisp methods, you need to install some additional packages:  {'leidenalg', 'graph_tool', 'infomap', 'karateclub', 'wurlitzer'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'ASLPAw', 'karateclub'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'leidenalg', 'wurlitzer', 'infomap'}


In [4]:
tot_pos = nx.read_gpickle('..\data_collection\graph\pos_graph_tot.gpickle')

In [None]:
#CD label prop
coms_lp = algorithms.label_propagation(tot_pos)


In [None]:
#CD  leiden
coms_lei = algorithms.leiden(tot_pos)

In [None]:
#CD girvan_newman
coms_GN = algorithms.girvan_newman(tot_pos, level = 30)

In [None]:
# CD eigenvector
coms_eig = algorithms.eigenvector(tot_pos)

In [None]:
#modularity
print(evaluation.newman_girvan_modularity(tot_pos,coms_lp))
print(evaluation.newman_girvan_modularity(tot_pos,coms_lei))
print(evaluation.newman_girvan_modularity(tot_pos,coms_GN))
print(evaluation.newman_girvan_modularity(tot_pos,coms_eig))

In [None]:
#conductance
cond_lp = evaluation.conductance(tot_pos, coms_lp)
print(cond_lp)
cond_lei = evaluation.conductance(tot_pos, coms_lei, )
print(cond_lei)
cond_GN = evaluation.conductance(tot_pos, coms_GN)
print(cond_GN)
cond_eig = evaluation.conductance(tot_pos, coms_eig, summary= False)
print(cond_eig)

In [None]:
#Norm Mut Inf
print(evaluation.normalized_mutual_information(coms_lei, coms_eig))

print(evaluation.normalized_mutual_information(coms_lei, coms_GN))

print(evaluation.normalized_mutual_information(coms_lei, coms_lp))

print(evaluation.normalized_mutual_information(coms_GN, coms_eig))

print(evaluation.normalized_mutual_information(coms_lp, coms_GN))

print(evaluation.normalized_mutual_information(coms_eig, coms_lp))

In [None]:
pos = nx.spring_layout(tot_pos, k = 0.5)
fig1 = plt.figure()
viz.plot_network_clusters(tot_pos, coms_eig, pos, figsize=(20, 20), plot_labels=False)

In [None]:
viz.plot_network_clusters(tot_pos, coms_GN, pos, figsize=(20, 20), plot_labels= False)

In [None]:
viz.plot_network_clusters(tot_pos, coms_lp, pos, figsize=(20, 20), plot_labels= False)
viz.plot_network_clusters(tot_pos, coms_lei, pos, figsize=(20, 20), plot_labels= False)

In [None]:

viz.plot_community_graph(tot_pos, coms_eig, figsize=(10, 10))
viz.plot_community_graph(tot_pos, coms_GN, figsize=(10, 10))
viz.plot_community_graph(tot_pos, coms_lp, figsize=(10, 10))
viz.plot_community_graph(tot_pos, coms_lei, figsize=(10, 10))
#ex post, not so informative

In [None]:
viz.plot_com_stat([coms_eig, coms_GN, coms_lei], evaluation.internal_edge_density)
viz.plot_com_properties_relation([coms_eig, coms_GN, coms_lei], evaluation.size, evaluation.internal_edge_density)
viz.plot_sim_matrix([coms_eig, coms_GN, coms_lei, coms_lp], evaluation.adjusted_mutual_information)

In [None]:
#ground truth eval
list_gt = np.load('list_pos_graph_label.npy', allow_pickle = True).item()

In [None]:
n = list_gt['2022']
sector = nx.get_node_attributes(n, 'label')

In [None]:

communities_GT = []
list_health_care = []
list_industrial = []
list_consumer_discretionary = []
list_information_technology = []
list_consumer_staples = []
list_utilities = []
list_financials = []
list_materials = []
list_real_estate = []
list_energy = []
list_communication_services = []
for x in list_gt["2022"].nodes.data():
    if (x[1]['label']  == 'Health Care'):
        list_health_care.append(x[0])
    elif (x[1]['label']  == 'Industrial'):
        list_industrial.append(x[0])
    elif (x[1]['label']  == 'Consumer Discretionary'):
        list_consumer_discretionary.append(x[0])
    elif (x[1]['label']  == 'Information Technology'):
        list_information_technology.append(x[0])
    elif (x[1]['label']  == 'Consumer Staples'):
        list_consumer_staples.append(x[0])
    elif (x[1]['label']  == 'Utilities'):
        list_utilities.append(x[0])
    elif (x[1]['label']  == 'Financials'):
        list_financials.append(x[0])
    elif (x[1]['label']  == 'Materials'):
        list_materials.append(x[0])
    elif (x[1]['label']  == 'Real Estate'):
        list_real_estate.append(x[0])
    elif (x[1]['label']  == 'Energy'):
        list_energy.append(x[0])
    else: # elif (x[1]['label']  == 'Communication_services'):
        list_communication_services.append(x[0])

communities_GT = [list_communication_services, list_consumer_discretionary, list_utilities, list_financials, list_consumer_staples, list_energy, list_health_care, list_industrial, list_information_technology, list_materials, list_real_estate]

coms_GT = NodeClustering(communities_GT, graph=list_gt["2022"], method_name="Ground Truth")

In [None]:
#eval 
evaluation.adjusted_mutual_information(coms_eig,coms_GT)

In [None]:
evaluation.adjusted_mutual_information(coms_GN,coms_GT)

In [None]:
evaluation.adjusted_mutual_information(coms_lei,coms_GT)
evaluation.adjusted_mutual_information(coms_lp,coms_GT)